Jan Beulich
2007-Aug-09 12:20 UTC
[Xen-devel] [PATCH] vmx: last branch recording MSR emulation
.. to have feature parity with SVM. This required adding infrastructure to make use of VMX'' MSR save/ restore feature as well as making the MSR intercept bitmap per-VM. (Applies cleanly only on top of the previously sent SVM/EFER and HVM/CPUID patches.) Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmcs.c 2007-08-06 15:08:41.000000000 +0200 +++ 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c 2007-08-08 11:46:40.000000000 +0200 @@ -163,6 +163,10 @@ static void vmx_init_vmcs_config(void) /* Require Write-Back (WB) memory type for VMCS accesses. */ BUG_ON(((vmx_msr_high >> 18) & 15) != 6); + + rdmsr(MSR_IA32_VMX_MISC, vmx_msr_low, vmx_msr_high); + /* 16-byte entries in 512-entry steps */ + vmx_msr_max_order = ((vmx_msr_low >> 25) & 7) + 13 - PAGE_SHIFT; } static struct vmcs_struct *vmx_alloc_vmcs(void) @@ -378,7 +382,7 @@ static void vmx_set_host_env(struct vcpu #define GUEST_SEGMENT_LIMIT 0xffffffff -static void construct_vmcs(struct vcpu *v) +static int construct_vmcs(struct vcpu *v) { unsigned long cr0, cr4; union vmcs_arbytes arbytes; @@ -394,8 +398,22 @@ static void construct_vmcs(struct vcpu * if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS ) __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control); + /* MSR access bitmap. */ if ( cpu_has_vmx_msr_bitmap ) - __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap)); + { + char *msr_bitmap = alloc_xenheap_page(); + + if ( msr_bitmap == NULL) + return -ENOMEM; + memset(msr_bitmap, ~0, PAGE_SIZE); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP); + v->arch.hvm_vmx.msr_bitmap = msr_bitmap; + __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); + } /* I/O access bitmap. */ __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap)); @@ -427,10 +445,8 @@ static void construct_vmcs(struct vcpu * __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); /* MSR intercepts. */ - __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0); - __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0); - __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); __vmwrite(VM_ENTRY_INTR_INFO, 0); @@ -537,6 +553,131 @@ static void construct_vmcs(struct vcpu * paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ vmx_vlapic_msr_changed(v); + + return 0; +} + +int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val) +{ + unsigned int i, msr_count = v->arch.hvm_vmx.msr_count; + const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; + + for ( i = 0; i < msr_count; ++i ) + if (msr_area[i].index == msr) + { + *val = msr_area[i].data; + return 0; + } + + return -ESRCH; +} + +int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val) +{ + unsigned int i, msr_count = v->arch.hvm_vmx.msr_count; + struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; + + for ( i = 0; i < msr_count; ++i ) + if (msr_area[i].index == msr) + { + msr_area[i].data = val; + return 0; + } + + return -ESRCH; +} + +int vmx_add_guest_msr(struct vcpu *v, u32 msr) +{ + unsigned int i, order; + unsigned int msr_count = v->arch.hvm_vmx.msr_count; + struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; + + for ( i = 0; i < msr_count; ++i ) + if (msr_area[i].index == msr) + return 0; + + order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area)); + if ( order > vmx_msr_max_order ) + return -ENOSPC; + + if ( v->arch.hvm_vmx.msr_order < order ) + { + if ( (msr_area = alloc_xenheap_pages(order)) == NULL ) + return -ENOMEM; + if ( v->arch.hvm_vmx.msr_order ) + { + memcpy(msr_area, + v->arch.hvm_vmx.msr_area, + msr_count * sizeof(*msr_area)); + free_xenheap_pages(v->arch.hvm_vmx.msr_area, + v->arch.hvm_vmx.msr_order); + } +#ifdef __i386__ + else + { + __vmwrite(VM_EXIT_MSR_STORE_ADDR_HIGH, 0); + __vmwrite(VM_ENTRY_MSR_LOAD_ADDR_HIGH, 0); + } +#endif + v->arch.hvm_vmx.msr_area = msr_area; + v->arch.hvm_vmx.msr_order = order; + __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area)); + __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area)); + } + + msr_area[msr_count].index = msr; + msr_area[msr_count].mbz = 0; + msr_area[msr_count].data = 0; + v->arch.hvm_vmx.msr_count = ++msr_count; + __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count); + __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count); + + return 0; +} + +int vmx_add_host_load_msr(struct vcpu *v, u32 msr) +{ + unsigned int i, order; + unsigned int msr_count = v->arch.hvm_vmx.host_msr_count; + struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area; + + for ( i = 0; i < msr_count; ++i ) + if (msr_area[i].index == msr) + return 0; + + order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area)); + if ( order > vmx_msr_max_order ) + return -ENOSPC; + + if ( v->arch.hvm_vmx.host_msr_order < order ) + { + if ( (msr_area = alloc_xenheap_pages(order)) == NULL ) + return -ENOMEM; + if ( v->arch.hvm_vmx.host_msr_order ) + { + memcpy(msr_area, + v->arch.hvm_vmx.host_msr_area, + msr_count * sizeof(*msr_area)); + free_xenheap_pages(v->arch.hvm_vmx.host_msr_area, + v->arch.hvm_vmx.host_msr_order); + } +#ifdef __i386__ + else + __vmwrite(VM_EXIT_MSR_LOAD_ADDR_HIGH, 0); +#endif + v->arch.hvm_vmx.host_msr_area = msr_area; + v->arch.hvm_vmx.host_msr_order = order; + __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area)); + } + + msr_area[msr_count].index = msr; + msr_area[msr_count].mbz = 0; + rdmsrl(msr, msr_area[msr_count].data); + v->arch.hvm_vmx.host_msr_count = ++msr_count; + __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count); + + return 0; } int vmx_create_vmcs(struct vcpu *v) Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 11:45:25.000000000 +0200 +++ 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 11:56:05.000000000 +0200 @@ -53,7 +53,7 @@ enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised }; -char *vmx_msr_bitmap; +unsigned int vmx_msr_max_order = 0; static void vmx_ctxt_switch_from(struct vcpu *v); static void vmx_ctxt_switch_to(struct vcpu *v); @@ -1170,26 +1170,6 @@ static int vmx_event_pending(struct vcpu return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK); } -static void disable_intercept_for_msr(u32 msr) -{ - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if ( msr <= 0x1fff ) - { - __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */ - __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */ - } - else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) - { - msr &= 0x1fff; - __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */ - __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */ - } -} - static struct hvm_function_table vmx_function_table = { .name = "VMX", .domain_initialise = vmx_domain_initialise, @@ -1259,21 +1239,6 @@ void start_vmx(void) setup_vmcs_dump(); hvm_enable(&vmx_function_table); - - if ( cpu_has_vmx_msr_bitmap ) - { - printk("VMX: MSR intercept bitmap enabled\n"); - vmx_msr_bitmap = alloc_xenheap_page(); - BUG_ON(vmx_msr_bitmap == NULL); - memset(vmx_msr_bitmap, ~0, PAGE_SIZE); - - disable_intercept_for_msr(MSR_FS_BASE); - disable_intercept_for_msr(MSR_GS_BASE); - - disable_intercept_for_msr(MSR_IA32_SYSENTER_CS); - disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP); - disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP); - } } /* @@ -1380,7 +1345,10 @@ static void vmx_do_cpuid(struct cpu_user bitmaskof(X86_FEATURE_ACC)); /* Unsupported for virtualised CPUs. */ - ecx &= ~(bitmaskof(X86_FEATURE_PDCM)); + ecx &= ~(bitmaskof(X86_FEATURE_PDCM) | + bitmaskof(X86_FEATURE_DSCPL)); + + edx &= ~bitmaskof(X86_FEATURE_DTES); break; @@ -2572,6 +2540,82 @@ static int vmx_cr_access(unsigned long e return 1; } +static const struct lbr_info { + u32 base, count; +} p4_lbr[] = { + { MSR_P4_LER_FROM_LIP, 1 }, + { MSR_P4_LER_TO_LIP, 1 }, + { MSR_P4_LASTBRANCH_TOS, 1 }, + { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, + { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, + { 0, 0 } +}, c2_lbr[] = { + { MSR_IA32_LASTINTFROMIP, 1 }, + { MSR_IA32_LASTINTTOIP, 1 }, + { MSR_P6_LASTBRANCH_TOS, 1 }, + { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO }, + { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO }, + { 0, 0 } +#ifdef __i386__ +}, pm_lbr[] = { + { MSR_IA32_LASTINTFROMIP, 1 }, + { MSR_IA32_LASTINTTOIP, 1 }, + { MSR_P6_LASTBRANCH_TOS, 1 }, + { MSR_PM_LASTBRANCH_0, NUM_MSR_PM_LASTBRANCH }, + { 0, 0 } +#endif +}; + +static const struct lbr_info *last_branch_msr_get(void) +{ + switch ( boot_cpu_data.x86 ) + { + case 6: + switch ( boot_cpu_data.x86_model ) + { +#ifdef __i386__ + /* PentiumM */ + case 9: case 13: + /* Core Solo/Duo */ + case 14: + return pm_lbr; + break; +#endif + /* Core2 Duo */ + case 15: + return c2_lbr; + break; + } + break; + + case 15: + switch ( boot_cpu_data.x86_model ) + { + /* Pentium4/Xeon with em64t */ + case 3: case 4: case 6: + return p4_lbr; + break; + } + break; + } + + return NULL; +} + +static int last_branch_msr(u32 ecx) +{ + const struct lbr_info *lbr = last_branch_msr_get(); + + if ( lbr != NULL ) + { + for ( ; lbr->count; ++lbr ) + if ( ecx >= lbr->base && ecx < lbr->base + lbr->count ) + return 1; + } + + return 0; +} + static int vmx_do_msr_read(struct cpu_user_regs *regs) { u64 msr_content = 0; @@ -2597,6 +2641,10 @@ static int vmx_do_msr_read(struct cpu_us case MSR_IA32_APICBASE: msr_content = vcpu_vlapic(v)->hw.apic_base_msr; break; + case MSR_IA32_DEBUGCTLMSR: + if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0) + msr_content = 0; + break; case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: goto gp_fault; case MSR_IA32_MCG_STATUS: @@ -2610,6 +2658,15 @@ static int vmx_do_msr_read(struct cpu_us msr_content = 0; break; default: + if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0) + break; + + if ( last_branch_msr(ecx) ) + { + msr_content = 0; + break; + } + switch ( long_mode_do_msr_read(regs) ) { case HNDL_unhandled: @@ -2736,13 +2793,50 @@ static int vmx_do_msr_write(struct cpu_u case MSR_IA32_APICBASE: vlapic_msr_set(vcpu_vlapic(v), msr_content); break; + case MSR_IA32_DEBUGCTLMSR: + if ( msr_content & ~3 ) + break; + if ( msr_content ) + { + int rc = 0; + + if ( msr_content & 1 ) + { + const struct lbr_info *lbr = last_branch_msr_get(); + + if ( lbr == NULL ) + break; + for ( ; rc == 0 && lbr->count; ++lbr ) + { + u32 i; + + for ( i = 0; rc == 0 && i < lbr->count; ++i ) + { + rc = vmx_add_guest_msr(v, lbr->base + i); + if ( rc == 0 && cpu_has_vmx_msr_bitmap ) + vmx_disable_intercept_for_msr(v->arch.hvm_vmx.msr_bitmap, + lbr->base + i); + } + } + } + + if ( rc < 0 || + vmx_add_guest_msr(v, ecx) < 0 || + vmx_add_host_load_msr(v, ecx) < 0) + vmx_inject_hw_exception(v, TRAP_machine_check, 0); + else + vmx_write_guest_msr(v, ecx, msr_content); + } + break; case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: goto gp_fault; default: switch ( long_mode_do_msr_write(regs) ) { case HNDL_unhandled: - wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); + if ( vmx_write_guest_msr(v, ecx, msr_content) != 0 && + !last_branch_msr(ecx) ) + wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); break; case HNDL_exception_raised: return 0; Index: 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h ==================================================================--- 2007-08-08.orig/xen/include/asm-x86/hvm/vmx/vmcs.h 2007-08-06 15:08:41.000000000 +0200 +++ 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h 2007-08-08 11:45:33.000000000 +0200 @@ -35,6 +35,13 @@ struct vmcs_struct { unsigned char data [0]; /* vmcs size is read from MSR */ }; +struct vmx_msr_entry { + u32 index; + u32 mbz; + u64 data; +}; +extern unsigned int vmx_msr_max_order; + enum { VMX_INDEX_MSR_LSTAR = 0, VMX_INDEX_MSR_STAR, @@ -79,6 +86,14 @@ struct arch_vmx_struct { #endif unsigned long efer; + char *msr_bitmap; + unsigned int msr_order; + unsigned int msr_count; + struct vmx_msr_entry *msr_area; + unsigned int host_msr_order; + unsigned int host_msr_count; + struct vmx_msr_entry *host_msr_area; + /* Following fields are all specific to vmxassist. */ unsigned long vmxassist_enabled:1; unsigned long irqbase_mode:1; @@ -137,7 +152,6 @@ extern bool_t cpu_has_vmx_ins_outs_instr (vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS) #define cpu_has_vmx_msr_bitmap \ (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP) -extern char *vmx_msr_bitmap; /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define VMX_INTR_SHADOW_STI 0x00000001 @@ -274,6 +288,31 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; +static inline void vmx_disable_intercept_for_msr(char *msr_bitmap, u32 msr) +{ + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if ( msr <= 0x1fff ) + { + __clear_bit(msr, msr_bitmap + 0x000); /* read-low */ + __clear_bit(msr, msr_bitmap + 0x800); /* write-low */ + } + else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) + { + msr &= 0x1fff; + __clear_bit(msr, msr_bitmap + 0x400); /* read-high */ + __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */ + } +} + +int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val); +int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val); +int vmx_add_guest_msr(struct vcpu *v, u32 msr); +int vmx_add_host_load_msr(struct vcpu *v, u32 msr); + #endif /* ASM_X86_HVM_VMX_VMCS_H__ */ /* Index: 2007-08-08/xen/include/asm-x86/msr.h ==================================================================--- 2007-08-08.orig/xen/include/asm-x86/msr.h 2007-08-08 11:43:53.000000000 +0200 +++ 2007-08-08/xen/include/asm-x86/msr.h 2007-08-08 11:45:33.000000000 +0200 @@ -200,6 +200,13 @@ static inline void write_efer(__u64 val) #define MSR_P6_EVNTSEL0 0x186 #define MSR_P6_EVNTSEL1 0x187 +#define MSR_P6_LASTBRANCH_TOS 0x1c9 +#define MSR_PM_LASTBRANCH_0 0x40 +#define NUM_MSR_PM_LASTBRANCH 8 +#define MSR_C2_LASTBRANCH_0_FROM_IP 0x40 +#define MSR_C2_LASTBRANCH_0_TO_IP 0x60 +#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4 + #define MSR_IA32_PERF_STATUS 0x198 #define MSR_IA32_PERF_CTL 0x199 @@ -223,6 +230,8 @@ static inline void write_efer(__u64 val) #define MSR_IA32_MC0_ADDR 0x402 #define MSR_IA32_MC0_MISC 0x403 +#define MSR_IA32_DS_AREA 0x600 + /* K8 Machine Check MSRs */ #define MSR_K8_MC1_CTL 0x404 #define MSR_K8_MC1_STATUS 0x405 @@ -333,6 +342,15 @@ static inline void write_efer(__u64 val) #define MSR_P4_U2L_ESCR0 0x3b0 #define MSR_P4_U2L_ESCR1 0x3b1 +#define MSR_P4_LER_FROM_LIP 0x1d7 +#define MSR_P4_LER_TO_LIP 0x1d8 +#define MSR_P4_LASTBRANCH_TOS 0x1da +#define MSR_P4_LASTBRANCH_0 0x1db +#define NUM_MSR_P4_LASTBRANCH 4 +#define MSR_P4_LASTBRANCH_0_FROM_LIP 0x680 +#define MSR_P4_LASTBRANCH_0_TO_LIP 0x6c0 +#define NUM_MSR_P4_LASTBRANCH_FROM_TO 16 + #define MSR_K6_WHCR 0xC0000082 #define MSR_K6_UWCCR 0xC0000085 #define MSR_K6_EPMR 0xC0000086 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Li, Xin B
2007-Aug-09 12:25 UTC
RE: [Xen-devel] [PATCH] vmx: last branch recording MSR emulation
We don''t want to use this feature currently because _each_ vmentry/vmexit will have to do additional msr operations. -Xin>-----Original Message----- >From: xen-devel-bounces@lists.xensource.com >[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Jan Beulich >Sent: Thursday, August 09, 2007 8:21 PM >To: xen-devel@lists.xensource.com >Subject: [Xen-devel] [PATCH] vmx: last branch recording MSR emulation > >.. to have feature parity with SVM. > >This required adding infrastructure to make use of VMX'' MSR save/ >restore feature as well as making the MSR intercept bitmap per-VM. > >(Applies cleanly only on top of the previously sent SVM/EFER and >HVM/CPUID patches.) > >Signed-off-by: Jan Beulich <jbeulich@novell.com> > >Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c >==================================================================>--- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmcs.c >2007-08-06 15:08:41.000000000 +0200 >+++ 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c 2007-08-08 >11:46:40.000000000 +0200 >@@ -163,6 +163,10 @@ static void vmx_init_vmcs_config(void) > > /* Require Write-Back (WB) memory type for VMCS accesses. */ > BUG_ON(((vmx_msr_high >> 18) & 15) != 6); >+ >+ rdmsr(MSR_IA32_VMX_MISC, vmx_msr_low, vmx_msr_high); >+ /* 16-byte entries in 512-entry steps */ >+ vmx_msr_max_order = ((vmx_msr_low >> 25) & 7) + 13 - PAGE_SHIFT; > } > > static struct vmcs_struct *vmx_alloc_vmcs(void) >@@ -378,7 +382,7 @@ static void vmx_set_host_env(struct vcpu > > #define GUEST_SEGMENT_LIMIT 0xffffffff > >-static void construct_vmcs(struct vcpu *v) >+static int construct_vmcs(struct vcpu *v) > { > unsigned long cr0, cr4; > union vmcs_arbytes arbytes; >@@ -394,8 +398,22 @@ static void construct_vmcs(struct vcpu * > if ( vmx_cpu_based_exec_control & >CPU_BASED_ACTIVATE_SECONDARY_CONTROLS ) > __vmwrite(SECONDARY_VM_EXEC_CONTROL, >vmx_secondary_exec_control); > >+ /* MSR access bitmap. */ > if ( cpu_has_vmx_msr_bitmap ) >- __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap)); >+ { >+ char *msr_bitmap = alloc_xenheap_page(); >+ >+ if ( msr_bitmap == NULL) >+ return -ENOMEM; >+ memset(msr_bitmap, ~0, PAGE_SIZE); >+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE); >+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE); >+ vmx_disable_intercept_for_msr(msr_bitmap, >MSR_IA32_SYSENTER_CS); >+ vmx_disable_intercept_for_msr(msr_bitmap, >MSR_IA32_SYSENTER_ESP); >+ vmx_disable_intercept_for_msr(msr_bitmap, >MSR_IA32_SYSENTER_EIP); >+ v->arch.hvm_vmx.msr_bitmap = msr_bitmap; >+ __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); >+ } > > /* I/O access bitmap. */ > __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap)); >@@ -427,10 +445,8 @@ static void construct_vmcs(struct vcpu * > __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); > > /* MSR intercepts. */ >- __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0); >- __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0); >- __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); > __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); >+ __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); > __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); > > __vmwrite(VM_ENTRY_INTR_INFO, 0); >@@ -537,6 +553,131 @@ static void construct_vmcs(struct vcpu * > paging_update_paging_modes(v); /* will update HOST & >GUEST_CR3 as reqd */ > > vmx_vlapic_msr_changed(v); >+ >+ return 0; >+} >+ >+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val) >+{ >+ unsigned int i, msr_count = v->arch.hvm_vmx.msr_count; >+ const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; >+ >+ for ( i = 0; i < msr_count; ++i ) >+ if (msr_area[i].index == msr) >+ { >+ *val = msr_area[i].data; >+ return 0; >+ } >+ >+ return -ESRCH; >+} >+ >+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val) >+{ >+ unsigned int i, msr_count = v->arch.hvm_vmx.msr_count; >+ struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; >+ >+ for ( i = 0; i < msr_count; ++i ) >+ if (msr_area[i].index == msr) >+ { >+ msr_area[i].data = val; >+ return 0; >+ } >+ >+ return -ESRCH; >+} >+ >+int vmx_add_guest_msr(struct vcpu *v, u32 msr) >+{ >+ unsigned int i, order; >+ unsigned int msr_count = v->arch.hvm_vmx.msr_count; >+ struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; >+ >+ for ( i = 0; i < msr_count; ++i ) >+ if (msr_area[i].index == msr) >+ return 0; >+ >+ order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area)); >+ if ( order > vmx_msr_max_order ) >+ return -ENOSPC; >+ >+ if ( v->arch.hvm_vmx.msr_order < order ) >+ { >+ if ( (msr_area = alloc_xenheap_pages(order)) == NULL ) >+ return -ENOMEM; >+ if ( v->arch.hvm_vmx.msr_order ) >+ { >+ memcpy(msr_area, >+ v->arch.hvm_vmx.msr_area, >+ msr_count * sizeof(*msr_area)); >+ free_xenheap_pages(v->arch.hvm_vmx.msr_area, >+ v->arch.hvm_vmx.msr_order); >+ } >+#ifdef __i386__ >+ else >+ { >+ __vmwrite(VM_EXIT_MSR_STORE_ADDR_HIGH, 0); >+ __vmwrite(VM_ENTRY_MSR_LOAD_ADDR_HIGH, 0); >+ } >+#endif >+ v->arch.hvm_vmx.msr_area = msr_area; >+ v->arch.hvm_vmx.msr_order = order; >+ __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area)); >+ __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area)); >+ } >+ >+ msr_area[msr_count].index = msr; >+ msr_area[msr_count].mbz = 0; >+ msr_area[msr_count].data = 0; >+ v->arch.hvm_vmx.msr_count = ++msr_count; >+ __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count); >+ __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count); >+ >+ return 0; >+} >+ >+int vmx_add_host_load_msr(struct vcpu *v, u32 msr) >+{ >+ unsigned int i, order; >+ unsigned int msr_count = v->arch.hvm_vmx.host_msr_count; >+ struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area; >+ >+ for ( i = 0; i < msr_count; ++i ) >+ if (msr_area[i].index == msr) >+ return 0; >+ >+ order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area)); >+ if ( order > vmx_msr_max_order ) >+ return -ENOSPC; >+ >+ if ( v->arch.hvm_vmx.host_msr_order < order ) >+ { >+ if ( (msr_area = alloc_xenheap_pages(order)) == NULL ) >+ return -ENOMEM; >+ if ( v->arch.hvm_vmx.host_msr_order ) >+ { >+ memcpy(msr_area, >+ v->arch.hvm_vmx.host_msr_area, >+ msr_count * sizeof(*msr_area)); >+ free_xenheap_pages(v->arch.hvm_vmx.host_msr_area, >+ v->arch.hvm_vmx.host_msr_order); >+ } >+#ifdef __i386__ >+ else >+ __vmwrite(VM_EXIT_MSR_LOAD_ADDR_HIGH, 0); >+#endif >+ v->arch.hvm_vmx.host_msr_area = msr_area; >+ v->arch.hvm_vmx.host_msr_order = order; >+ __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area)); >+ } >+ >+ msr_area[msr_count].index = msr; >+ msr_area[msr_count].mbz = 0; >+ rdmsrl(msr, msr_area[msr_count].data); >+ v->arch.hvm_vmx.host_msr_count = ++msr_count; >+ __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count); >+ >+ return 0; > } > > int vmx_create_vmcs(struct vcpu *v) >Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c >==================================================================>--- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 >11:45:25.000000000 +0200 >+++ 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 >11:56:05.000000000 +0200 >@@ -53,7 +53,7 @@ > > enum handler_return { HNDL_done, HNDL_unhandled, >HNDL_exception_raised }; > >-char *vmx_msr_bitmap; >+unsigned int vmx_msr_max_order = 0; > > static void vmx_ctxt_switch_from(struct vcpu *v); > static void vmx_ctxt_switch_to(struct vcpu *v); >@@ -1170,26 +1170,6 @@ static int vmx_event_pending(struct vcpu > return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK); > } > >-static void disable_intercept_for_msr(u32 msr) >-{ >- /* >- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). >Early manuals >- * have the write-low and read-high bitmap offsets the >wrong way round. >- * We can control MSRs 0x00000000-0x00001fff and >0xc0000000-0xc0001fff. >- */ >- if ( msr <= 0x1fff ) >- { >- __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */ >- __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */ >- } >- else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) >- { >- msr &= 0x1fff; >- __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */ >- __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */ >- } >-} >- > static struct hvm_function_table vmx_function_table = { > .name = "VMX", > .domain_initialise = vmx_domain_initialise, >@@ -1259,21 +1239,6 @@ void start_vmx(void) > setup_vmcs_dump(); > > hvm_enable(&vmx_function_table); >- >- if ( cpu_has_vmx_msr_bitmap ) >- { >- printk("VMX: MSR intercept bitmap enabled\n"); >- vmx_msr_bitmap = alloc_xenheap_page(); >- BUG_ON(vmx_msr_bitmap == NULL); >- memset(vmx_msr_bitmap, ~0, PAGE_SIZE); >- >- disable_intercept_for_msr(MSR_FS_BASE); >- disable_intercept_for_msr(MSR_GS_BASE); >- >- disable_intercept_for_msr(MSR_IA32_SYSENTER_CS); >- disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP); >- disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP); >- } > } > > /* >@@ -1380,7 +1345,10 @@ static void vmx_do_cpuid(struct cpu_user > bitmaskof(X86_FEATURE_ACC)); > > /* Unsupported for virtualised CPUs. */ >- ecx &= ~(bitmaskof(X86_FEATURE_PDCM)); >+ ecx &= ~(bitmaskof(X86_FEATURE_PDCM) | >+ bitmaskof(X86_FEATURE_DSCPL)); >+ >+ edx &= ~bitmaskof(X86_FEATURE_DTES); > > break; > >@@ -2572,6 +2540,82 @@ static int vmx_cr_access(unsigned long e > return 1; > } > >+static const struct lbr_info { >+ u32 base, count; >+} p4_lbr[] = { >+ { MSR_P4_LER_FROM_LIP, 1 }, >+ { MSR_P4_LER_TO_LIP, 1 }, >+ { MSR_P4_LASTBRANCH_TOS, 1 }, >+ { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, >+ { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, >+ { 0, 0 } >+}, c2_lbr[] = { >+ { MSR_IA32_LASTINTFROMIP, 1 }, >+ { MSR_IA32_LASTINTTOIP, 1 }, >+ { MSR_P6_LASTBRANCH_TOS, 1 }, >+ { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO }, >+ { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO }, >+ { 0, 0 } >+#ifdef __i386__ >+}, pm_lbr[] = { >+ { MSR_IA32_LASTINTFROMIP, 1 }, >+ { MSR_IA32_LASTINTTOIP, 1 }, >+ { MSR_P6_LASTBRANCH_TOS, 1 }, >+ { MSR_PM_LASTBRANCH_0, NUM_MSR_PM_LASTBRANCH }, >+ { 0, 0 } >+#endif >+}; >+ >+static const struct lbr_info *last_branch_msr_get(void) >+{ >+ switch ( boot_cpu_data.x86 ) >+ { >+ case 6: >+ switch ( boot_cpu_data.x86_model ) >+ { >+#ifdef __i386__ >+ /* PentiumM */ >+ case 9: case 13: >+ /* Core Solo/Duo */ >+ case 14: >+ return pm_lbr; >+ break; >+#endif >+ /* Core2 Duo */ >+ case 15: >+ return c2_lbr; >+ break; >+ } >+ break; >+ >+ case 15: >+ switch ( boot_cpu_data.x86_model ) >+ { >+ /* Pentium4/Xeon with em64t */ >+ case 3: case 4: case 6: >+ return p4_lbr; >+ break; >+ } >+ break; >+ } >+ >+ return NULL; >+} >+ >+static int last_branch_msr(u32 ecx) >+{ >+ const struct lbr_info *lbr = last_branch_msr_get(); >+ >+ if ( lbr != NULL ) >+ { >+ for ( ; lbr->count; ++lbr ) >+ if ( ecx >= lbr->base && ecx < lbr->base + lbr->count ) >+ return 1; >+ } >+ >+ return 0; >+} >+ > static int vmx_do_msr_read(struct cpu_user_regs *regs) > { > u64 msr_content = 0; >@@ -2597,6 +2641,10 @@ static int vmx_do_msr_read(struct cpu_us > case MSR_IA32_APICBASE: > msr_content = vcpu_vlapic(v)->hw.apic_base_msr; > break; >+ case MSR_IA32_DEBUGCTLMSR: >+ if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0) >+ msr_content = 0; >+ break; > case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: > goto gp_fault; > case MSR_IA32_MCG_STATUS: >@@ -2610,6 +2658,15 @@ static int vmx_do_msr_read(struct cpu_us > msr_content = 0; > break; > default: >+ if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0) >+ break; >+ >+ if ( last_branch_msr(ecx) ) >+ { >+ msr_content = 0; >+ break; >+ } >+ > switch ( long_mode_do_msr_read(regs) ) > { > case HNDL_unhandled: >@@ -2736,13 +2793,50 @@ static int vmx_do_msr_write(struct cpu_u > case MSR_IA32_APICBASE: > vlapic_msr_set(vcpu_vlapic(v), msr_content); > break; >+ case MSR_IA32_DEBUGCTLMSR: >+ if ( msr_content & ~3 ) >+ break; >+ if ( msr_content ) >+ { >+ int rc = 0; >+ >+ if ( msr_content & 1 ) >+ { >+ const struct lbr_info *lbr = last_branch_msr_get(); >+ >+ if ( lbr == NULL ) >+ break; >+ for ( ; rc == 0 && lbr->count; ++lbr ) >+ { >+ u32 i; >+ >+ for ( i = 0; rc == 0 && i < lbr->count; ++i ) >+ { >+ rc = vmx_add_guest_msr(v, lbr->base + i); >+ if ( rc == 0 && cpu_has_vmx_msr_bitmap ) >+ >vmx_disable_intercept_for_msr(v->arch.hvm_vmx.msr_bitmap, >+ >lbr->base + i); >+ } >+ } >+ } >+ >+ if ( rc < 0 || >+ vmx_add_guest_msr(v, ecx) < 0 || >+ vmx_add_host_load_msr(v, ecx) < 0) >+ vmx_inject_hw_exception(v, TRAP_machine_check, 0); >+ else >+ vmx_write_guest_msr(v, ecx, msr_content); >+ } >+ break; > case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: > goto gp_fault; > default: > switch ( long_mode_do_msr_write(regs) ) > { > case HNDL_unhandled: >- wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); >+ if ( vmx_write_guest_msr(v, ecx, msr_content) != 0 && >+ !last_branch_msr(ecx) ) >+ wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); > break; > case HNDL_exception_raised: > return 0; >Index: 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h >==================================================================>--- 2007-08-08.orig/xen/include/asm-x86/hvm/vmx/vmcs.h >2007-08-06 15:08:41.000000000 +0200 >+++ 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h >2007-08-08 11:45:33.000000000 +0200 >@@ -35,6 +35,13 @@ struct vmcs_struct { > unsigned char data [0]; /* vmcs size is read from MSR */ > }; > >+struct vmx_msr_entry { >+ u32 index; >+ u32 mbz; >+ u64 data; >+}; >+extern unsigned int vmx_msr_max_order; >+ > enum { > VMX_INDEX_MSR_LSTAR = 0, > VMX_INDEX_MSR_STAR, >@@ -79,6 +86,14 @@ struct arch_vmx_struct { > #endif > unsigned long efer; > >+ char *msr_bitmap; >+ unsigned int msr_order; >+ unsigned int msr_count; >+ struct vmx_msr_entry *msr_area; >+ unsigned int host_msr_order; >+ unsigned int host_msr_count; >+ struct vmx_msr_entry *host_msr_area; >+ > /* Following fields are all specific to vmxassist. */ > unsigned long vmxassist_enabled:1; > unsigned long irqbase_mode:1; >@@ -137,7 +152,6 @@ extern bool_t cpu_has_vmx_ins_outs_instr > (vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS) > #define cpu_has_vmx_msr_bitmap \ > (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP) >-extern char *vmx_msr_bitmap; > > /* GUEST_INTERRUPTIBILITY_INFO flags. */ > #define VMX_INTR_SHADOW_STI 0x00000001 >@@ -274,6 +288,31 @@ enum vmcs_field { > HOST_RIP = 0x00006c16, > }; > >+static inline void vmx_disable_intercept_for_msr(char >*msr_bitmap, u32 msr) >+{ >+ /* >+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). >Early manuals >+ * have the write-low and read-high bitmap offsets the >wrong way round. >+ * We can control MSRs 0x00000000-0x00001fff and >0xc0000000-0xc0001fff. >+ */ >+ if ( msr <= 0x1fff ) >+ { >+ __clear_bit(msr, msr_bitmap + 0x000); /* read-low */ >+ __clear_bit(msr, msr_bitmap + 0x800); /* write-low */ >+ } >+ else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) >+ { >+ msr &= 0x1fff; >+ __clear_bit(msr, msr_bitmap + 0x400); /* read-high */ >+ __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */ >+ } >+} >+ >+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val); >+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val); >+int vmx_add_guest_msr(struct vcpu *v, u32 msr); >+int vmx_add_host_load_msr(struct vcpu *v, u32 msr); >+ > #endif /* ASM_X86_HVM_VMX_VMCS_H__ */ > > /* >Index: 2007-08-08/xen/include/asm-x86/msr.h >==================================================================>--- 2007-08-08.orig/xen/include/asm-x86/msr.h 2007-08-08 >11:43:53.000000000 +0200 >+++ 2007-08-08/xen/include/asm-x86/msr.h 2007-08-08 >11:45:33.000000000 +0200 >@@ -200,6 +200,13 @@ static inline void write_efer(__u64 val) > #define MSR_P6_EVNTSEL0 0x186 > #define MSR_P6_EVNTSEL1 0x187 > >+#define MSR_P6_LASTBRANCH_TOS 0x1c9 >+#define MSR_PM_LASTBRANCH_0 0x40 >+#define NUM_MSR_PM_LASTBRANCH 8 >+#define MSR_C2_LASTBRANCH_0_FROM_IP 0x40 >+#define MSR_C2_LASTBRANCH_0_TO_IP 0x60 >+#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4 >+ > #define MSR_IA32_PERF_STATUS 0x198 > #define MSR_IA32_PERF_CTL 0x199 > >@@ -223,6 +230,8 @@ static inline void write_efer(__u64 val) > #define MSR_IA32_MC0_ADDR 0x402 > #define MSR_IA32_MC0_MISC 0x403 > >+#define MSR_IA32_DS_AREA 0x600 >+ > /* K8 Machine Check MSRs */ > #define MSR_K8_MC1_CTL 0x404 > #define MSR_K8_MC1_STATUS 0x405 >@@ -333,6 +342,15 @@ static inline void write_efer(__u64 val) > #define MSR_P4_U2L_ESCR0 0x3b0 > #define MSR_P4_U2L_ESCR1 0x3b1 > >+#define MSR_P4_LER_FROM_LIP 0x1d7 >+#define MSR_P4_LER_TO_LIP 0x1d8 >+#define MSR_P4_LASTBRANCH_TOS 0x1da >+#define MSR_P4_LASTBRANCH_0 0x1db >+#define NUM_MSR_P4_LASTBRANCH 4 >+#define MSR_P4_LASTBRANCH_0_FROM_LIP 0x680 >+#define MSR_P4_LASTBRANCH_0_TO_LIP 0x6c0 >+#define NUM_MSR_P4_LASTBRANCH_FROM_TO 16 >+ > #define MSR_K6_WHCR 0xC0000082 > #define MSR_K6_UWCCR 0xC0000085 > #define MSR_K6_EPMR 0xC0000086 > > > >_______________________________________________ >Xen-devel mailing list >Xen-devel@lists.xensource.com >http://lists.xensource.com/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Aug-09 12:44 UTC
Re: [Xen-devel] [PATCH] vmx: last branch recording MSR emulation
I suspect that it nly gets enabled if the guest touches the debug-control MSR. Nonetheless, the patch is rather complicated. It''ll need some review. -- Keir On 9/8/07 13:25, "Li, Xin B" <xin.b.li@intel.com> wrote:> We don''t want to use this feature currently because _each_ > vmentry/vmexit will have to do additional msr operations. > -Xin > >> -----Original Message----- >> From: xen-devel-bounces@lists.xensource.com >> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Jan Beulich >> Sent: Thursday, August 09, 2007 8:21 PM >> To: xen-devel@lists.xensource.com >> Subject: [Xen-devel] [PATCH] vmx: last branch recording MSR emulation >> >> .. to have feature parity with SVM. >> >> This required adding infrastructure to make use of VMX'' MSR save/ >> restore feature as well as making the MSR intercept bitmap per-VM. >> >> (Applies cleanly only on top of the previously sent SVM/EFER and >> HVM/CPUID patches.) >> >> Signed-off-by: Jan Beulich <jbeulich@novell.com> >> >> Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c >> ==================================================================>> --- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmcs.c >> 2007-08-06 15:08:41.000000000 +0200 >> +++ 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c 2007-08-08 >> 11:46:40.000000000 +0200 >> @@ -163,6 +163,10 @@ static void vmx_init_vmcs_config(void) >> >> /* Require Write-Back (WB) memory type for VMCS accesses. */ >> BUG_ON(((vmx_msr_high >> 18) & 15) != 6); >> + >> + rdmsr(MSR_IA32_VMX_MISC, vmx_msr_low, vmx_msr_high); >> + /* 16-byte entries in 512-entry steps */ >> + vmx_msr_max_order = ((vmx_msr_low >> 25) & 7) + 13 - PAGE_SHIFT; >> } >> >> static struct vmcs_struct *vmx_alloc_vmcs(void) >> @@ -378,7 +382,7 @@ static void vmx_set_host_env(struct vcpu >> >> #define GUEST_SEGMENT_LIMIT 0xffffffff >> >> -static void construct_vmcs(struct vcpu *v) >> +static int construct_vmcs(struct vcpu *v) >> { >> unsigned long cr0, cr4; >> union vmcs_arbytes arbytes; >> @@ -394,8 +398,22 @@ static void construct_vmcs(struct vcpu * >> if ( vmx_cpu_based_exec_control & >> CPU_BASED_ACTIVATE_SECONDARY_CONTROLS ) >> __vmwrite(SECONDARY_VM_EXEC_CONTROL, >> vmx_secondary_exec_control); >> >> + /* MSR access bitmap. */ >> if ( cpu_has_vmx_msr_bitmap ) >> - __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap)); >> + { >> + char *msr_bitmap = alloc_xenheap_page(); >> + >> + if ( msr_bitmap == NULL) >> + return -ENOMEM; >> + memset(msr_bitmap, ~0, PAGE_SIZE); >> + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE); >> + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE); >> + vmx_disable_intercept_for_msr(msr_bitmap, >> MSR_IA32_SYSENTER_CS); >> + vmx_disable_intercept_for_msr(msr_bitmap, >> MSR_IA32_SYSENTER_ESP); >> + vmx_disable_intercept_for_msr(msr_bitmap, >> MSR_IA32_SYSENTER_EIP); >> + v->arch.hvm_vmx.msr_bitmap = msr_bitmap; >> + __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); >> + } >> >> /* I/O access bitmap. */ >> __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap)); >> @@ -427,10 +445,8 @@ static void construct_vmcs(struct vcpu * >> __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); >> >> /* MSR intercepts. */ >> - __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0); >> - __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0); >> - __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); >> __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); >> + __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); >> __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); >> >> __vmwrite(VM_ENTRY_INTR_INFO, 0); >> @@ -537,6 +553,131 @@ static void construct_vmcs(struct vcpu * >> paging_update_paging_modes(v); /* will update HOST & >> GUEST_CR3 as reqd */ >> >> vmx_vlapic_msr_changed(v); >> + >> + return 0; >> +} >> + >> +int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val) >> +{ >> + unsigned int i, msr_count = v->arch.hvm_vmx.msr_count; >> + const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; >> + >> + for ( i = 0; i < msr_count; ++i ) >> + if (msr_area[i].index == msr) >> + { >> + *val = msr_area[i].data; >> + return 0; >> + } >> + >> + return -ESRCH; >> +} >> + >> +int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val) >> +{ >> + unsigned int i, msr_count = v->arch.hvm_vmx.msr_count; >> + struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; >> + >> + for ( i = 0; i < msr_count; ++i ) >> + if (msr_area[i].index == msr) >> + { >> + msr_area[i].data = val; >> + return 0; >> + } >> + >> + return -ESRCH; >> +} >> + >> +int vmx_add_guest_msr(struct vcpu *v, u32 msr) >> +{ >> + unsigned int i, order; >> + unsigned int msr_count = v->arch.hvm_vmx.msr_count; >> + struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area; >> + >> + for ( i = 0; i < msr_count; ++i ) >> + if (msr_area[i].index == msr) >> + return 0; >> + >> + order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area)); >> + if ( order > vmx_msr_max_order ) >> + return -ENOSPC; >> + >> + if ( v->arch.hvm_vmx.msr_order < order ) >> + { >> + if ( (msr_area = alloc_xenheap_pages(order)) == NULL ) >> + return -ENOMEM; >> + if ( v->arch.hvm_vmx.msr_order ) >> + { >> + memcpy(msr_area, >> + v->arch.hvm_vmx.msr_area, >> + msr_count * sizeof(*msr_area)); >> + free_xenheap_pages(v->arch.hvm_vmx.msr_area, >> + v->arch.hvm_vmx.msr_order); >> + } >> +#ifdef __i386__ >> + else >> + { >> + __vmwrite(VM_EXIT_MSR_STORE_ADDR_HIGH, 0); >> + __vmwrite(VM_ENTRY_MSR_LOAD_ADDR_HIGH, 0); >> + } >> +#endif >> + v->arch.hvm_vmx.msr_area = msr_area; >> + v->arch.hvm_vmx.msr_order = order; >> + __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area)); >> + __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area)); >> + } >> + >> + msr_area[msr_count].index = msr; >> + msr_area[msr_count].mbz = 0; >> + msr_area[msr_count].data = 0; >> + v->arch.hvm_vmx.msr_count = ++msr_count; >> + __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count); >> + __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count); >> + >> + return 0; >> +} >> + >> +int vmx_add_host_load_msr(struct vcpu *v, u32 msr) >> +{ >> + unsigned int i, order; >> + unsigned int msr_count = v->arch.hvm_vmx.host_msr_count; >> + struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area; >> + >> + for ( i = 0; i < msr_count; ++i ) >> + if (msr_area[i].index == msr) >> + return 0; >> + >> + order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area)); >> + if ( order > vmx_msr_max_order ) >> + return -ENOSPC; >> + >> + if ( v->arch.hvm_vmx.host_msr_order < order ) >> + { >> + if ( (msr_area = alloc_xenheap_pages(order)) == NULL ) >> + return -ENOMEM; >> + if ( v->arch.hvm_vmx.host_msr_order ) >> + { >> + memcpy(msr_area, >> + v->arch.hvm_vmx.host_msr_area, >> + msr_count * sizeof(*msr_area)); >> + free_xenheap_pages(v->arch.hvm_vmx.host_msr_area, >> + v->arch.hvm_vmx.host_msr_order); >> + } >> +#ifdef __i386__ >> + else >> + __vmwrite(VM_EXIT_MSR_LOAD_ADDR_HIGH, 0); >> +#endif >> + v->arch.hvm_vmx.host_msr_area = msr_area; >> + v->arch.hvm_vmx.host_msr_order = order; >> + __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area)); >> + } >> + >> + msr_area[msr_count].index = msr; >> + msr_area[msr_count].mbz = 0; >> + rdmsrl(msr, msr_area[msr_count].data); >> + v->arch.hvm_vmx.host_msr_count = ++msr_count; >> + __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count); >> + >> + return 0; >> } >> >> int vmx_create_vmcs(struct vcpu *v) >> Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c >> ==================================================================>> --- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 >> 11:45:25.000000000 +0200 >> +++ 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 >> 11:56:05.000000000 +0200 >> @@ -53,7 +53,7 @@ >> >> enum handler_return { HNDL_done, HNDL_unhandled, >> HNDL_exception_raised }; >> >> -char *vmx_msr_bitmap; >> +unsigned int vmx_msr_max_order = 0; >> >> static void vmx_ctxt_switch_from(struct vcpu *v); >> static void vmx_ctxt_switch_to(struct vcpu *v); >> @@ -1170,26 +1170,6 @@ static int vmx_event_pending(struct vcpu >> return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK); >> } >> >> -static void disable_intercept_for_msr(u32 msr) >> -{ >> - /* >> - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). >> Early manuals >> - * have the write-low and read-high bitmap offsets the >> wrong way round. >> - * We can control MSRs 0x00000000-0x00001fff and >> 0xc0000000-0xc0001fff. >> - */ >> - if ( msr <= 0x1fff ) >> - { >> - __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */ >> - __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */ >> - } >> - else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) >> - { >> - msr &= 0x1fff; >> - __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */ >> - __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */ >> - } >> -} >> - >> static struct hvm_function_table vmx_function_table = { >> .name = "VMX", >> .domain_initialise = vmx_domain_initialise, >> @@ -1259,21 +1239,6 @@ void start_vmx(void) >> setup_vmcs_dump(); >> >> hvm_enable(&vmx_function_table); >> - >> - if ( cpu_has_vmx_msr_bitmap ) >> - { >> - printk("VMX: MSR intercept bitmap enabled\n"); >> - vmx_msr_bitmap = alloc_xenheap_page(); >> - BUG_ON(vmx_msr_bitmap == NULL); >> - memset(vmx_msr_bitmap, ~0, PAGE_SIZE); >> - >> - disable_intercept_for_msr(MSR_FS_BASE); >> - disable_intercept_for_msr(MSR_GS_BASE); >> - >> - disable_intercept_for_msr(MSR_IA32_SYSENTER_CS); >> - disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP); >> - disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP); >> - } >> } >> >> /* >> @@ -1380,7 +1345,10 @@ static void vmx_do_cpuid(struct cpu_user >> bitmaskof(X86_FEATURE_ACC)); >> >> /* Unsupported for virtualised CPUs. */ >> - ecx &= ~(bitmaskof(X86_FEATURE_PDCM)); >> + ecx &= ~(bitmaskof(X86_FEATURE_PDCM) | >> + bitmaskof(X86_FEATURE_DSCPL)); >> + >> + edx &= ~bitmaskof(X86_FEATURE_DTES); >> >> break; >> >> @@ -2572,6 +2540,82 @@ static int vmx_cr_access(unsigned long e >> return 1; >> } >> >> +static const struct lbr_info { >> + u32 base, count; >> +} p4_lbr[] = { >> + { MSR_P4_LER_FROM_LIP, 1 }, >> + { MSR_P4_LER_TO_LIP, 1 }, >> + { MSR_P4_LASTBRANCH_TOS, 1 }, >> + { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, >> + { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, >> + { 0, 0 } >> +}, c2_lbr[] = { >> + { MSR_IA32_LASTINTFROMIP, 1 }, >> + { MSR_IA32_LASTINTTOIP, 1 }, >> + { MSR_P6_LASTBRANCH_TOS, 1 }, >> + { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO }, >> + { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO }, >> + { 0, 0 } >> +#ifdef __i386__ >> +}, pm_lbr[] = { >> + { MSR_IA32_LASTINTFROMIP, 1 }, >> + { MSR_IA32_LASTINTTOIP, 1 }, >> + { MSR_P6_LASTBRANCH_TOS, 1 }, >> + { MSR_PM_LASTBRANCH_0, NUM_MSR_PM_LASTBRANCH }, >> + { 0, 0 } >> +#endif >> +}; >> + >> +static const struct lbr_info *last_branch_msr_get(void) >> +{ >> + switch ( boot_cpu_data.x86 ) >> + { >> + case 6: >> + switch ( boot_cpu_data.x86_model ) >> + { >> +#ifdef __i386__ >> + /* PentiumM */ >> + case 9: case 13: >> + /* Core Solo/Duo */ >> + case 14: >> + return pm_lbr; >> + break; >> +#endif >> + /* Core2 Duo */ >> + case 15: >> + return c2_lbr; >> + break; >> + } >> + break; >> + >> + case 15: >> + switch ( boot_cpu_data.x86_model ) >> + { >> + /* Pentium4/Xeon with em64t */ >> + case 3: case 4: case 6: >> + return p4_lbr; >> + break; >> + } >> + break; >> + } >> + >> + return NULL; >> +} >> + >> +static int last_branch_msr(u32 ecx) >> +{ >> + const struct lbr_info *lbr = last_branch_msr_get(); >> + >> + if ( lbr != NULL ) >> + { >> + for ( ; lbr->count; ++lbr ) >> + if ( ecx >= lbr->base && ecx < lbr->base + lbr->count ) >> + return 1; >> + } >> + >> + return 0; >> +} >> + >> static int vmx_do_msr_read(struct cpu_user_regs *regs) >> { >> u64 msr_content = 0; >> @@ -2597,6 +2641,10 @@ static int vmx_do_msr_read(struct cpu_us >> case MSR_IA32_APICBASE: >> msr_content = vcpu_vlapic(v)->hw.apic_base_msr; >> break; >> + case MSR_IA32_DEBUGCTLMSR: >> + if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0) >> + msr_content = 0; >> + break; >> case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: >> goto gp_fault; >> case MSR_IA32_MCG_STATUS: >> @@ -2610,6 +2658,15 @@ static int vmx_do_msr_read(struct cpu_us >> msr_content = 0; >> break; >> default: >> + if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0) >> + break; >> + >> + if ( last_branch_msr(ecx) ) >> + { >> + msr_content = 0; >> + break; >> + } >> + >> switch ( long_mode_do_msr_read(regs) ) >> { >> case HNDL_unhandled: >> @@ -2736,13 +2793,50 @@ static int vmx_do_msr_write(struct cpu_u >> case MSR_IA32_APICBASE: >> vlapic_msr_set(vcpu_vlapic(v), msr_content); >> break; >> + case MSR_IA32_DEBUGCTLMSR: >> + if ( msr_content & ~3 ) >> + break; >> + if ( msr_content ) >> + { >> + int rc = 0; >> + >> + if ( msr_content & 1 ) >> + { >> + const struct lbr_info *lbr = last_branch_msr_get(); >> + >> + if ( lbr == NULL ) >> + break; >> + for ( ; rc == 0 && lbr->count; ++lbr ) >> + { >> + u32 i; >> + >> + for ( i = 0; rc == 0 && i < lbr->count; ++i ) >> + { >> + rc = vmx_add_guest_msr(v, lbr->base + i); >> + if ( rc == 0 && cpu_has_vmx_msr_bitmap ) >> + >> vmx_disable_intercept_for_msr(v->arch.hvm_vmx.msr_bitmap, >> + >> lbr->base + i); >> + } >> + } >> + } >> + >> + if ( rc < 0 || >> + vmx_add_guest_msr(v, ecx) < 0 || >> + vmx_add_host_load_msr(v, ecx) < 0) >> + vmx_inject_hw_exception(v, TRAP_machine_check, 0); >> + else >> + vmx_write_guest_msr(v, ecx, msr_content); >> + } >> + break; >> case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2: >> goto gp_fault; >> default: >> switch ( long_mode_do_msr_write(regs) ) >> { >> case HNDL_unhandled: >> - wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); >> + if ( vmx_write_guest_msr(v, ecx, msr_content) != 0 && >> + !last_branch_msr(ecx) ) >> + wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx); >> break; >> case HNDL_exception_raised: >> return 0; >> Index: 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h >> ==================================================================>> --- 2007-08-08.orig/xen/include/asm-x86/hvm/vmx/vmcs.h >> 2007-08-06 15:08:41.000000000 +0200 >> +++ 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h >> 2007-08-08 11:45:33.000000000 +0200 >> @@ -35,6 +35,13 @@ struct vmcs_struct { >> unsigned char data [0]; /* vmcs size is read from MSR */ >> }; >> >> +struct vmx_msr_entry { >> + u32 index; >> + u32 mbz; >> + u64 data; >> +}; >> +extern unsigned int vmx_msr_max_order; >> + >> enum { >> VMX_INDEX_MSR_LSTAR = 0, >> VMX_INDEX_MSR_STAR, >> @@ -79,6 +86,14 @@ struct arch_vmx_struct { >> #endif >> unsigned long efer; >> >> + char *msr_bitmap; >> + unsigned int msr_order; >> + unsigned int msr_count; >> + struct vmx_msr_entry *msr_area; >> + unsigned int host_msr_order; >> + unsigned int host_msr_count; >> + struct vmx_msr_entry *host_msr_area; >> + >> /* Following fields are all specific to vmxassist. */ >> unsigned long vmxassist_enabled:1; >> unsigned long irqbase_mode:1; >> @@ -137,7 +152,6 @@ extern bool_t cpu_has_vmx_ins_outs_instr >> (vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS) >> #define cpu_has_vmx_msr_bitmap \ >> (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP) >> -extern char *vmx_msr_bitmap; >> >> /* GUEST_INTERRUPTIBILITY_INFO flags. */ >> #define VMX_INTR_SHADOW_STI 0x00000001 >> @@ -274,6 +288,31 @@ enum vmcs_field { >> HOST_RIP = 0x00006c16, >> }; >> >> +static inline void vmx_disable_intercept_for_msr(char >> *msr_bitmap, u32 msr) >> +{ >> + /* >> + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). >> Early manuals >> + * have the write-low and read-high bitmap offsets the >> wrong way round. >> + * We can control MSRs 0x00000000-0x00001fff and >> 0xc0000000-0xc0001fff. >> + */ >> + if ( msr <= 0x1fff ) >> + { >> + __clear_bit(msr, msr_bitmap + 0x000); /* read-low */ >> + __clear_bit(msr, msr_bitmap + 0x800); /* write-low */ >> + } >> + else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) >> + { >> + msr &= 0x1fff; >> + __clear_bit(msr, msr_bitmap + 0x400); /* read-high */ >> + __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */ >> + } >> +} >> + >> +int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val); >> +int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val); >> +int vmx_add_guest_msr(struct vcpu *v, u32 msr); >> +int vmx_add_host_load_msr(struct vcpu *v, u32 msr); >> + >> #endif /* ASM_X86_HVM_VMX_VMCS_H__ */ >> >> /* >> Index: 2007-08-08/xen/include/asm-x86/msr.h >> ==================================================================>> --- 2007-08-08.orig/xen/include/asm-x86/msr.h 2007-08-08 >> 11:43:53.000000000 +0200 >> +++ 2007-08-08/xen/include/asm-x86/msr.h 2007-08-08 >> 11:45:33.000000000 +0200 >> @@ -200,6 +200,13 @@ static inline void write_efer(__u64 val) >> #define MSR_P6_EVNTSEL0 0x186 >> #define MSR_P6_EVNTSEL1 0x187 >> >> +#define MSR_P6_LASTBRANCH_TOS 0x1c9 >> +#define MSR_PM_LASTBRANCH_0 0x40 >> +#define NUM_MSR_PM_LASTBRANCH 8 >> +#define MSR_C2_LASTBRANCH_0_FROM_IP 0x40 >> +#define MSR_C2_LASTBRANCH_0_TO_IP 0x60 >> +#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4 >> + >> #define MSR_IA32_PERF_STATUS 0x198 >> #define MSR_IA32_PERF_CTL 0x199 >> >> @@ -223,6 +230,8 @@ static inline void write_efer(__u64 val) >> #define MSR_IA32_MC0_ADDR 0x402 >> #define MSR_IA32_MC0_MISC 0x403 >> >> +#define MSR_IA32_DS_AREA 0x600 >> + >> /* K8 Machine Check MSRs */ >> #define MSR_K8_MC1_CTL 0x404 >> #define MSR_K8_MC1_STATUS 0x405 >> @@ -333,6 +342,15 @@ static inline void write_efer(__u64 val) >> #define MSR_P4_U2L_ESCR0 0x3b0 >> #define MSR_P4_U2L_ESCR1 0x3b1 >> >> +#define MSR_P4_LER_FROM_LIP 0x1d7 >> +#define MSR_P4_LER_TO_LIP 0x1d8 >> +#define MSR_P4_LASTBRANCH_TOS 0x1da >> +#define MSR_P4_LASTBRANCH_0 0x1db >> +#define NUM_MSR_P4_LASTBRANCH 4 >> +#define MSR_P4_LASTBRANCH_0_FROM_LIP 0x680 >> +#define MSR_P4_LASTBRANCH_0_TO_LIP 0x6c0 >> +#define NUM_MSR_P4_LASTBRANCH_FROM_TO 16 >> + >> #define MSR_K6_WHCR 0xC0000082 >> #define MSR_K6_UWCCR 0xC0000085 >> #define MSR_K6_EPMR 0xC0000086 >> >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xensource.com >> http://lists.xensource.com/xen-devel >> > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2007-Aug-09 12:47 UTC
RE: [Xen-devel] [PATCH] vmx: last branch recording MSR emulation
>>> "Li, Xin B" <xin.b.li@intel.com> 09.08.07 14:25 >>> >We don''t want to use this feature currently because _each_ >vmentry/vmexit will have to do additional msr operations.Then why would hardware supply the feature? Also, as long as the guest doesn''t use the feature, no extra saves/restores are being needed. Further, since there''s no feature detection mechanism (other than family:model) for a guest to determine whether to make use of that functionality, a guest trying to do so would likely mis-behave (most likely crash on the attempt to write a non-zero value to DebugCtlMSR). Certainly, there are more thing like this that can happen, but for this specific part I''m about to submit a Linux patch to make use of the functionality - such kernels would then no longer work under HVM. Finally, with LBR registers being used in Xen itself (optionally), you''d expose hypervisor internal information to HVM''s, which is generally considered a security risk. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Aug-09 12:49 UTC
Re: [Xen-devel] [PATCH] vmx: last branch recording MSR emulation
On 9/8/07 13:47, "Jan Beulich" <jbeulich@novell.com> wrote:> Finally, with LBR registers being used in Xen itself (optionally), you''d > expose > hypervisor internal information to HVM''s, which is generally considered a > security risk.Well, that''s due to the current rather stupid policy of defaulting HVM MSR reads to read the native MSR. MSR handling needs unifying and a big clean up, just like has now happened to the control registers. Same for CPUID (which has a similarly stupid policy to that of MSR reads). -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2007-Aug-09 13:05 UTC
Re: [Xen-devel] [PATCH] vmx: last branch recording MSR emulation
>>> Keir Fraser <keir@xensource.com> 09.08.07 14:49 >>> >On 9/8/07 13:47, "Jan Beulich" <jbeulich@novell.com> wrote: > >> Finally, with LBR registers being used in Xen itself (optionally), you''d >> expose >> hypervisor internal information to HVM''s, which is generally considered a >> security risk. > >Well, that''s due to the current rather stupid policy of defaulting HVM MSR >reads to read the native MSR. MSR handling needs unifying and a big clean >up, just like has now happened to the control registers. Same for CPUID >(which has a similarly stupid policy to that of MSR reads).I''ve been hearing you say this for quite a while, and from an abstract point of view I would also agree. However, other than the control registers MSRs and CPUID really have quite a bit of vendor specifics, and hence I''d be afraid that unification here would not result in much better code. (An example of things incorrectly done on both sides is the recent insertion of MSR_K8_* cases in VMX code - how would an Intel CPU ever have K8-specific MSRs?) The default of reading native registers is of course very questionable, but it''s been that way for so long that I didn''t dare to kill this, as I''m suspecting that booting some, if not all, OSes without this would not work. And the then likely resulting incrementally adding of emulation for individual MSRs on an empirical basis is something that I consider, as pointed out on other similar occasions like the MMIO instruction decoder, very wrong for code that is no longer in a proof-of-concept state. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Aug-09 13:20 UTC
Re: [Xen-devel] [PATCH] vmx: last branch recording MSR emulation
On 9/8/07 14:05, "Jan Beulich" <jbeulich@novell.com> wrote:> The default of reading native registers is of course very questionable, but > it''s been that way for so long that I didn''t dare to kill this, as I''m > suspecting > that booting some, if not all, OSes without this would not work. And the then > likely resulting incrementally adding of emulation for individual MSRs on an > empirical basis is something that I consider, as pointed out on other similar > occasions like the MMIO instruction decoder, very wrong for code that is no > longer in a proof-of-concept state.Doing it the current way you get bitten when host MSRs contain unexpected values. This is why the MCE/MCA MSR handling got added -- some guests were getting confused, and thius crashing, on one particular host type that we hadn''t tested on before. We''re just storing up trouble for the future. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Oct-15 14:35 UTC
Re: [Xen-devel] [PATCH] vmx: last branch recording MSR emulation
On 9/8/07 13:20, "Jan Beulich" <jbeulich@novell.com> wrote:> .. to have feature parity with SVM. > > This required adding infrastructure to make use of VMX'' MSR save/ > restore feature as well as making the MSR intercept bitmap per-VM.> + rdmsr(MSR_IA32_VMX_MISC, vmx_msr_low, vmx_msr_high); > + /* 16-byte entries in 512-entry steps */ > + vmx_msr_max_order = ((vmx_msr_low >> 25) & 7) + 13 - PAGE_SHIFT;It''s not clear to me from the reference manual how VMX_MISC[25:27] is supposed to be interpreted. The manual claims in G.5 that if the value is N then the maximum number of MSRs is (N+1)*512. Note that the formula is linear in N, not exponential, so I think turning N into an order variable (vmx_msr_max_order) is incorrect. However, the manual then confuses me. In 20.7.2 it says "it is recommended that [MSR-store] count not exceed 512 bytes". This doesn''t tally with the formula G.5 which would imply that the smallest limit would be 512 *MSRs*, not 512 bytes. I wonder if 20.7.2 is incorrect? I''d like to get this latter point clarified (I cc''ed Xin Li at Intel to see if he has any idea). If the smallest limit is indeed 512 MSRs then I see little point in interrogating VMX_MISC at all, or including limit-checking code. We''re absolutely nowhere near pushing the limit of 512 MSRS. Also that limit requires 8kB of MSR data -- we can''t really guarantee success of contiguous multi-page allocations anyway (as you have seen) so actually we''d like to keep to a 256-MSR limit for that reason. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel