Dongxiao Xu
2012-Dec-05 13:02 UTC
[PATCH v2 00/11] nested vmx: bug fixes and feature enabling
This series of patches contain some bug fixes and feature enabling for nested vmx, please help to review and pull. For the following patches, it doesn''t have influence about Xen on Xen functionality. No special need to backport to 4.2.x (My own opinion). Changes from v1 to v2: - Use literal name instead of hard numbers to expose default 1 settings in VMX related MSRs. - For TRUE VMX MSRs, we use the same value as normal VMX MSRs. - Fix a coding style issue. Thanks, Dongxiao Dongxiao Xu (11): nested vmx: emulate MSR bitmaps nested vmx: use literal name instead of hard numbers nested vmx: expose bit 55 of IA32_VMX_BASIC_MSR to guest VMM nested vmx: fix rflags status in virtual vmexit nested vmx: fix handling of RDTSC nested vmx: fix DR access VM exit nested vmx: enable IA32E mode while do VM entry nested vmx: enable "Virtualize APIC accesses" feature for L1 VMM nested vmx: enable PAUSE and RDPMC exiting for L1 VMM nested vmx: fix interrupt delivery to L2 guest nested vmx: check host ability when intercept MSR read xen/arch/x86/hvm/vmx/intr.c | 11 ++- xen/arch/x86/hvm/vmx/vmcs.c | 28 +++++++++ xen/arch/x86/hvm/vmx/vmx.c | 2 +- xen/arch/x86/hvm/vmx/vvmx.c | 115 ++++++++++++++++++++++++++++++------ xen/include/asm-x86/hvm/vmx/vmcs.h | 1 + xen/include/asm-x86/hvm/vmx/vmx.h | 2 + xen/include/asm-x86/hvm/vmx/vvmx.h | 10 +++ 7 files changed, 145 insertions(+), 24 deletions(-)
In nested vmx virtualization for MSR bitmaps, L0 hypervisor will trap all the VM exit from L2 guest by disable the MSR_BITMAP feature. When handling this VM exit, L0 hypervisor judges whether L1 hypervisor uses MSR_BITMAP feature and the corresponding bit is set to 1. If so, L0 will inject such VM exit into L1 hypervisor; otherwise, L0 will be responsible for handling this VM exit. Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vmcs.c | 28 +++++++++++++++++++++++++ xen/arch/x86/hvm/vmx/vvmx.c | 39 ++++++++++++++++++++++++++++++++++- xen/include/asm-x86/hvm/vmx/vmcs.h | 1 + xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 0fbdd75..205e705 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -674,6 +674,34 @@ void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type) } /* + * access_type: read == 0, write == 1 + */ +int vmx_check_msr_bitmap(unsigned long *msr_bitmap, u32 msr, int access_type) +{ + int ret = 1; + if ( !msr_bitmap ) + return 1; + + if ( msr <= 0x1fff ) + { + if ( access_type == 0 ) + ret = test_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */ + else if ( access_type == 1 ) + ret = test_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */ + } + else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) + { + msr &= 0x1fff; + if ( access_type == 0 ) + ret = test_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */ + else if ( access_type == 1 ) + ret = test_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */ + } + return ret; +} + + +/* * Switch VMCS between layer 1 & 2 guest */ void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index ed47780..719bfce 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -48,6 +48,7 @@ int nvmx_vcpu_initialise(struct vcpu *v) nvmx->intr.error_code = 0; nvmx->iobitmap[0] = NULL; nvmx->iobitmap[1] = NULL; + nvmx->msrbitmap = NULL; return 0; out: return -ENOMEM; @@ -561,6 +562,17 @@ static void __clear_current_vvmcs(struct vcpu *v) __vmpclear(virt_to_maddr(nvcpu->nv_n2vmcx)); } +static void __map_msr_bitmap(struct vcpu *v) +{ + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + unsigned long gpa; + + if ( nvmx->msrbitmap ) + hvm_unmap_guest_frame (nvmx->msrbitmap); + gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, MSR_BITMAP); + nvmx->msrbitmap = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT); +} + static void __map_io_bitmap(struct vcpu *v, u64 vmcs_reg) { struct nestedvmx *nvmx = &vcpu_2_nvmx(v); @@ -597,6 +609,10 @@ static void nvmx_purge_vvmcs(struct vcpu *v) nvmx->iobitmap[i] = NULL; } } + if ( nvmx->msrbitmap ) { + hvm_unmap_guest_frame(nvmx->msrbitmap); + nvmx->msrbitmap = NULL; + } } u64 nvmx_get_tsc_offset(struct vcpu *v) @@ -1153,6 +1169,7 @@ int nvmx_handle_vmptrld(struct cpu_user_regs *regs) nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT); nvcpu->nv_vvmcxaddr = gpa; map_io_bitmap_all (v); + __map_msr_bitmap(v); } vmreturn(regs, VMSUCCEED); @@ -1270,6 +1287,9 @@ int nvmx_handle_vmwrite(struct cpu_user_regs *regs) vmcs_encoding == IO_BITMAP_B_HIGH ) __map_io_bitmap (v, IO_BITMAP_B); + if ( vmcs_encoding == MSR_BITMAP || vmcs_encoding == MSR_BITMAP_HIGH ) + __map_msr_bitmap(v); + vmreturn(regs, VMSUCCEED); return X86EMUL_OKAY; } @@ -1320,6 +1340,7 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) CPU_BASED_RDTSC_EXITING | CPU_BASED_MONITOR_TRAP_FLAG | CPU_BASED_VIRTUAL_NMI_PENDING | + CPU_BASED_ACTIVATE_MSR_BITMAP | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; /* bit 1, 4-6,8,13-16,26 must be 1 (refer G4 of SDM) */ tmp = ( (1<<26) | (0xf << 13) | 0x100 | (0x7 << 4) | 0x2); @@ -1497,8 +1518,6 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, case EXIT_REASON_TRIPLE_FAULT: case EXIT_REASON_TASK_SWITCH: case EXIT_REASON_CPUID: - case EXIT_REASON_MSR_READ: - case EXIT_REASON_MSR_WRITE: case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: @@ -1514,6 +1533,22 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, /* inject to L1 */ nvcpu->nv_vmexit_pending = 1; break; + case EXIT_REASON_MSR_READ: + case EXIT_REASON_MSR_WRITE: + { + int status; + ctrl = __n2_exec_control(v); + if ( ctrl & CPU_BASED_ACTIVATE_MSR_BITMAP ) + { + status = vmx_check_msr_bitmap(nvmx->msrbitmap, regs->ecx, + !!(exit_reason == EXIT_REASON_MSR_WRITE)); + if ( status ) + nvcpu->nv_vmexit_pending = 1; + } + else + nvcpu->nv_vmexit_pending = 1; + break; + } case EXIT_REASON_IO_INSTRUCTION: ctrl = __n2_exec_control(v); if ( ctrl & CPU_BASED_ACTIVATE_IO_BITMAP ) diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index cc92f69..14ac773 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -427,6 +427,7 @@ int vmx_add_host_load_msr(u32 msr); void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to); void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector); void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector); +int vmx_check_msr_bitmap(unsigned long *msr_bitmap, u32 msr, int access_type); #endif /* ASM_X86_HVM_VMX_VMCS_H__ */ diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h b/xen/include/asm-x86/hvm/vmx/vvmx.h index b9137b8..067fbe4 100644 --- a/xen/include/asm-x86/hvm/vmx/vvmx.h +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h @@ -26,6 +26,7 @@ struct nestedvmx { paddr_t vmxon_region_pa; void *iobitmap[2]; /* map (va) of L1 guest I/O bitmap */ + void *msrbitmap; /* map (va) of L1 guest MSR bitmap */ /* deferred nested interrupt */ struct { unsigned long intr_info; -- 1.7.1
Dongxiao Xu
2012-Dec-05 13:02 UTC
[PATCH v2 02/11] nested vmx: use literal name instead of hard numbers
For those default 1 settings in VMX MSR, use some literal name instead of hard numbers in the code. Besides, fix the default 1 setting for pin based control MSR. Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vvmx.c | 15 ++++++--------- xen/include/asm-x86/hvm/vmx/vvmx.h | 9 +++++++++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index 719bfce..4d0f26b 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1318,9 +1318,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) data = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_PREEMPT_TIMER; - data <<= 32; - /* 0-settings */ - data |= 0; + tmp = VMX_PINBASED_CTLS_DEFAULT1; + data = ((data | tmp) << 32) | (tmp); break; case MSR_IA32_VMX_PROCBASED_CTLS: /* 1-seetings */ @@ -1342,8 +1341,7 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_ACTIVATE_MSR_BITMAP | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; - /* bit 1, 4-6,8,13-16,26 must be 1 (refer G4 of SDM) */ - tmp = ( (1<<26) | (0xf << 13) | 0x100 | (0x7 << 4) | 0x2); + tmp = VMX_PROCBASED_CTLS_DEFAULT1; /* 0-settings */ data = ((data | tmp) << 32) | (tmp); break; @@ -1356,8 +1354,7 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) break; case MSR_IA32_VMX_EXIT_CTLS: /* 1-seetings */ - /* bit 0-8, 10,11,13,14,16,17 must be 1 (refer G4 of SDM) */ - tmp = 0x36dff; + tmp = VMX_EXIT_CTLS_DEFAULT1; data = VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_IA32E_MODE | VM_EXIT_SAVE_PREEMPT_TIMER | @@ -1370,8 +1367,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) data = ((data | tmp) << 32) | tmp; break; case MSR_IA32_VMX_ENTRY_CTLS: - /* bit 0-8, and 12 must be 1 (refer G5 of SDM) */ - tmp = 0x11ff; + /* 1-seetings */ + tmp = VMX_ENTRY_CTLS_DEFAULT1; data = VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER | VM_ENTRY_LOAD_PERF_GLOBAL_CTRL; diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h b/xen/include/asm-x86/hvm/vmx/vvmx.h index 067fbe4..dce2cd8 100644 --- a/xen/include/asm-x86/hvm/vmx/vvmx.h +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h @@ -36,6 +36,15 @@ struct nestedvmx { #define vcpu_2_nvmx(v) (vcpu_nestedhvm(v).u.nvmx) +/* bit 1, 2, 4 must be 1 */ +#define VMX_PINBASED_CTLS_DEFAULT1 0x16 +/* bit 1, 4-6,8,13-16,26 must be 1 */ +#define VMX_PROCBASED_CTLS_DEFAULT1 0x401e172 +/* bit 0-8, 10,11,13,14,16,17 must be 1 */ +#define VMX_EXIT_CTLS_DEFAULT1 0x36dff +/* bit 0-8, and 12 must be 1 */ +#define VMX_ENTRY_CTLS_DEFAULT1 0x11ff + /* * Encode of VMX instructions base on Table 24-11 & 24-12 of SDM 3B */ -- 1.7.1
Dongxiao Xu
2012-Dec-05 13:02 UTC
[PATCH v2 03/11] nested vmx: expose bit 55 of IA32_VMX_BASIC_MSR to guest VMM
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vvmx.c | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index 4d0f26b..a09fa97 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1299,7 +1299,7 @@ int nvmx_handle_vmwrite(struct cpu_user_regs *regs) */ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) { - u64 data = 0, tmp; + u64 data = 0, tmp = 0; int r = 1; if ( !nestedhvm_enabled(current->domain) ) @@ -1311,9 +1311,10 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) switch (msr) { case MSR_IA32_VMX_BASIC: data = VVMCS_REVISION | ((u64)PAGE_SIZE) << 32 | - ((u64)MTRR_TYPE_WRBACK) << 50; + ((u64)MTRR_TYPE_WRBACK) << 50 | (1ULL << 55); break; case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: /* 1-seetings */ data = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | @@ -1322,6 +1323,7 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) data = ((data | tmp) << 32) | (tmp); break; case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: /* 1-seetings */ data = CPU_BASED_HLT_EXITING | CPU_BASED_VIRTUAL_INTR_PENDING | @@ -1353,6 +1355,7 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) data = (data << 32) | tmp; break; case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_TRUE_EXIT_CTLS: /* 1-seetings */ tmp = VMX_EXIT_CTLS_DEFAULT1; data = VM_EXIT_ACK_INTR_ON_EXIT | @@ -1367,6 +1370,7 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) data = ((data | tmp) << 32) | tmp; break; case MSR_IA32_VMX_ENTRY_CTLS: + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: /* 1-seetings */ tmp = VMX_ENTRY_CTLS_DEFAULT1; data = VM_ENTRY_LOAD_GUEST_PAT | -- 1.7.1
Dongxiao Xu
2012-Dec-05 13:02 UTC
[PATCH v2 04/11] nested vmx: fix rflags status in virtual vmexit
As stated in SDM, all bits (except for those 1-reserved) in rflags would be set to 0 in VM exit. Therefore we need to follow this logic in virtual_vmexit. Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com> Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vvmx.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index a09fa97..6e5c1d3 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -991,7 +991,8 @@ static void virtual_vmexit(struct cpu_user_regs *regs) regs->eip = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RIP); regs->esp = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RSP); - regs->eflags = __vmread(GUEST_RFLAGS); + /* VM exit clears all bits except bit 1 */ + regs->eflags = 0x2; /* updating host cr0 to sync TS bit */ __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); -- 1.7.1
If L0 is to handle the TSC access, then we need to update guest EIP by calling update_guest_eip(). Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vmx.c | 2 +- xen/arch/x86/hvm/vmx/vvmx.c | 1 + xen/include/asm-x86/hvm/vmx/vmx.h | 2 ++ 3 files changed, 4 insertions(+), 1 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 3bb0d99..9fb9562 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1555,7 +1555,7 @@ static int get_instruction_length(void) return len; } -static void update_guest_eip(void) +void update_guest_eip(void) { struct cpu_user_regs *regs = guest_cpu_user_regs(); unsigned long x; diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index 6e5c1d3..fd5bb92 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1614,6 +1614,7 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, tsc += __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET); regs->eax = (uint32_t)tsc; regs->edx = (uint32_t)(tsc >> 32); + update_guest_eip(); return 1; } diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h index c4c2fe8..aa5b080 100644 --- a/xen/include/asm-x86/hvm/vmx/vmx.h +++ b/xen/include/asm-x86/hvm/vmx/vmx.h @@ -399,6 +399,8 @@ void ept_p2m_init(struct p2m_domain *p2m); void ept_walk_table(struct domain *d, unsigned long gfn); void setup_ept_dump(void); +void update_guest_eip(void); + /* EPT violation qualifications definitions */ #define _EPT_READ_VIOLATION 0 #define EPT_READ_VIOLATION (1UL<<_EPT_READ_VIOLATION) -- 1.7.1
For DR register, we use lazy restore mechanism when access it. Therefore when receiving such VM exit, L0 should be responsible to switch to the right DR values, then inject to L1 hypervisor. Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vvmx.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index fd5bb92..a5a8e3d 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1641,7 +1641,8 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, break; case EXIT_REASON_DR_ACCESS: ctrl = __n2_exec_control(v); - if ( ctrl & CPU_BASED_MOV_DR_EXITING ) + if ( (ctrl & CPU_BASED_MOV_DR_EXITING) && + v->arch.hvm_vcpu.flag_dr_dirty ) nvcpu->nv_vmexit_pending = 1; break; case EXIT_REASON_INVLPG: -- 1.7.1
Dongxiao Xu
2012-Dec-05 13:02 UTC
[PATCH v2 07/11] nested vmx: enable IA32E mode while do VM entry
Some VMMs may check the platform capability to judge whether long mode guest is supported. Therefore we need to expose this bit to guest VMM. Xen on Xen works fine in current solution because Xen doesn''t check this capability but directly set it in VMCS if guest supports long mode. Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vvmx.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index a5a8e3d..e4ce466 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1376,7 +1376,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) tmp = VMX_ENTRY_CTLS_DEFAULT1; data = VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER | - VM_ENTRY_LOAD_PERF_GLOBAL_CTRL; + VM_ENTRY_LOAD_PERF_GLOBAL_CTRL | + VM_ENTRY_IA32E_MODE; data = ((data | tmp) << 32) | tmp; break; -- 1.7.1
Dongxiao Xu
2012-Dec-05 13:02 UTC
[PATCH v2 08/11] nested vmx: enable "Virtualize APIC accesses" feature for L1 VMM
If the "Virtualize APIC accesses" feature is enabled, we need to sync the APIC-access address from virtual vvmcs into shadow vmcs when doing virtual_vmentry. Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vvmx.c | 27 ++++++++++++++++++++++++++- 1 files changed, 26 insertions(+), 1 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index e4ce466..ae553bb 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -554,6 +554,24 @@ void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value) set_shadow_control(v, EXCEPTION_BITMAP, value); } +static void nvmx_update_apic_access_address(struct vcpu *v) +{ + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + u64 apic_gpfn, apic_mfn; + u32 ctrl; + void *apic_va; + + ctrl = __n2_secondary_exec_control(v); + if ( ctrl & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES ) + { + apic_gpfn = __get_vvmcs(nvcpu->nv_vvmcx, APIC_ACCESS_ADDR) >> PAGE_SHIFT; + apic_va = hvm_map_guest_frame_ro(apic_gpfn); + apic_mfn = virt_to_mfn(apic_va); + __vmwrite(APIC_ACCESS_ADDR, (apic_mfn << PAGE_SHIFT)); + hvm_unmap_guest_frame(apic_va); + } +} + static void __clear_current_vvmcs(struct vcpu *v) { struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); @@ -761,6 +779,7 @@ static void load_shadow_control(struct vcpu *v) nvmx_update_exit_control(v, vmx_vmexit_control); nvmx_update_entry_control(v); vmx_update_exception_bitmap(v); + nvmx_update_apic_access_address(v); } static void load_shadow_guest_state(struct vcpu *v) @@ -1350,7 +1369,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) break; case MSR_IA32_VMX_PROCBASED_CTLS2: /* 1-seetings */ - data = SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING; + data = SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING | + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; /* 0-settings */ tmp = 0; data = (data << 32) | tmp; @@ -1680,6 +1700,11 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, break; } + case EXIT_REASON_APIC_ACCESS: + ctrl = __n2_secondary_exec_control(v); + if ( ctrl & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES ) + nvcpu->nv_vmexit_pending = 1; + break; default: gdprintk(XENLOG_WARNING, "Unknown nested vmexit reason %x.\n", exit_reason); -- 1.7.1
Dongxiao Xu
2012-Dec-05 13:02 UTC
[PATCH v2 09/11] nested vmx: enable PAUSE and RDPMC exiting for L1 VMM
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vvmx.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index ae553bb..fba09cc 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1362,6 +1362,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) CPU_BASED_MONITOR_TRAP_FLAG | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_ACTIVATE_MSR_BITMAP | + CPU_BASED_PAUSE_EXITING | + CPU_BASED_RDPMC_EXITING | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; tmp = VMX_PROCBASED_CTLS_DEFAULT1; /* 0-settings */ -- 1.7.1
Dongxiao Xu
2012-Dec-05 13:02 UTC
[PATCH v2 10/11] nested vmx: fix interrupt delivery to L2 guest
While delivering interrupt into L2 guest, L0 hypervisor need to check whether L1 hypervisor wants to own the interrupt, if not, directly inject the interrupt into L2 guest. Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com> Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/intr.c | 11 +++++++---- 1 files changed, 7 insertions(+), 4 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c index 3961bc7..ef8b925 100644 --- a/xen/arch/x86/hvm/vmx/intr.c +++ b/xen/arch/x86/hvm/vmx/intr.c @@ -163,7 +163,7 @@ enum hvm_intblk nvmx_intr_blocked(struct vcpu *v) static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack) { - u32 exit_ctrl; + u32 ctrl; if ( nvmx_intr_blocked(v) != hvm_intblk_none ) { @@ -176,11 +176,14 @@ static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack) if ( intack.source == hvm_intsrc_pic || intack.source == hvm_intsrc_lapic ) { + ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL); + if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) ) + return 0; + vmx_inject_extint(intack.vector); - exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, - VM_EXIT_CONTROLS); - if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT ) + ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS); + if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT ) { /* for now, duplicate the ack path in vmx_intr_assist */ hvm_vcpu_ack_pending_irq(v, intack); -- 1.7.1
Dongxiao Xu
2012-Dec-05 13:02 UTC
[PATCH v2 11/11] nested vmx: check host ability when intercept MSR read
When guest hypervisor tries to read MSR value, we intercept this behavior and return certain emulated values. Besides that, we also need to ensure that those emulated values must compatible with host ability. Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> --- xen/arch/x86/hvm/vmx/vvmx.c | 18 ++++++++++++++---- 1 files changed, 14 insertions(+), 4 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index fba09cc..e65f963 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1319,19 +1319,20 @@ int nvmx_handle_vmwrite(struct cpu_user_regs *regs) */ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) { - u64 data = 0, tmp = 0; + u64 data = 0, host_data = 0, tmp = 0; int r = 1; if ( !nestedhvm_enabled(current->domain) ) return 0; + rdmsrl(msr, host_data); + /* * Remove unsupport features from n1 guest capability MSR */ switch (msr) { case MSR_IA32_VMX_BASIC: - data = VVMCS_REVISION | ((u64)PAGE_SIZE) << 32 | - ((u64)MTRR_TYPE_WRBACK) << 50 | (1ULL << 55); + data = (host_data & (~0ul << 32)) | VVMCS_REVISION; break; case MSR_IA32_VMX_PINBASED_CTLS: case MSR_IA32_VMX_TRUE_PINBASED_CTLS: @@ -1341,6 +1342,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) PIN_BASED_PREEMPT_TIMER; tmp = VMX_PINBASED_CTLS_DEFAULT1; data = ((data | tmp) << 32) | (tmp); + data = ((data & host_data) & (~0ul << 32)) | + ((data | host_data) & (~0u)); break; case MSR_IA32_VMX_PROCBASED_CTLS: case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: @@ -1368,6 +1371,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) tmp = VMX_PROCBASED_CTLS_DEFAULT1; /* 0-settings */ data = ((data | tmp) << 32) | (tmp); + data = ((data & host_data) & (~0ul << 32)) | + ((data | host_data) & (~0u)); break; case MSR_IA32_VMX_PROCBASED_CTLS2: /* 1-seetings */ @@ -1376,6 +1381,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) /* 0-settings */ tmp = 0; data = (data << 32) | tmp; + data = ((data & host_data) & (~0ul << 32)) | + ((data | host_data) & (~0u)); break; case MSR_IA32_VMX_EXIT_CTLS: case MSR_IA32_VMX_TRUE_EXIT_CTLS: @@ -1391,6 +1398,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) VM_EXIT_LOAD_PERF_GLOBAL_CTRL; /* 0-settings */ data = ((data | tmp) << 32) | tmp; + data = ((data & host_data) & (~0ul << 32)) | + ((data | host_data) & (~0u)); break; case MSR_IA32_VMX_ENTRY_CTLS: case MSR_IA32_VMX_TRUE_ENTRY_CTLS: @@ -1401,8 +1410,9 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) VM_ENTRY_LOAD_PERF_GLOBAL_CTRL | VM_ENTRY_IA32E_MODE; data = ((data | tmp) << 32) | tmp; + data = ((data & host_data) & (~0ul << 32)) | + ((data | host_data) & (~0u)); break; - case IA32_FEATURE_CONTROL_MSR: data = IA32_FEATURE_CONTROL_MSR_LOCK | IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON_OUTSIDE_SMX; -- 1.7.1
Jan Beulich
2012-Dec-05 16:31 UTC
Re: [PATCH v2 03/11] nested vmx: expose bit 55 of IA32_VMX_BASIC_MSR to guest VMM
>>> On 05.12.12 at 14:02, Dongxiao Xu <dongxiao.xu@intel.com> wrote: > Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com> > --- > xen/arch/x86/hvm/vmx/vvmx.c | 8 ++++++-- > 1 files changed, 6 insertions(+), 2 deletions(-) > > diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c > index 4d0f26b..a09fa97 100644 > --- a/xen/arch/x86/hvm/vmx/vvmx.c > +++ b/xen/arch/x86/hvm/vmx/vvmx.c > @@ -1299,7 +1299,7 @@ int nvmx_handle_vmwrite(struct cpu_user_regs *regs) > */ > int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) > { > - u64 data = 0, tmp; > + u64 data = 0, tmp = 0; > int r = 1; > > if ( !nestedhvm_enabled(current->domain) ) > @@ -1311,9 +1311,10 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 > *msr_content) > switch (msr) { > case MSR_IA32_VMX_BASIC: > data = VVMCS_REVISION | ((u64)PAGE_SIZE) << 32 | > - ((u64)MTRR_TYPE_WRBACK) << 50; > + ((u64)MTRR_TYPE_WRBACK) << 50 | (1ULL << 55);There''s still this literal use of 55 here. Jan> break; > case MSR_IA32_VMX_PINBASED_CTLS: > + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: > /* 1-seetings */ > data = PIN_BASED_EXT_INTR_MASK | > PIN_BASED_NMI_EXITING | > @@ -1322,6 +1323,7 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 > *msr_content) > data = ((data | tmp) << 32) | (tmp); > break; > case MSR_IA32_VMX_PROCBASED_CTLS: > + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: > /* 1-seetings */ > data = CPU_BASED_HLT_EXITING | > CPU_BASED_VIRTUAL_INTR_PENDING | > @@ -1353,6 +1355,7 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 > *msr_content) > data = (data << 32) | tmp; > break; > case MSR_IA32_VMX_EXIT_CTLS: > + case MSR_IA32_VMX_TRUE_EXIT_CTLS: > /* 1-seetings */ > tmp = VMX_EXIT_CTLS_DEFAULT1; > data = VM_EXIT_ACK_INTR_ON_EXIT | > @@ -1367,6 +1370,7 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 > *msr_content) > data = ((data | tmp) << 32) | tmp; > break; > case MSR_IA32_VMX_ENTRY_CTLS: > + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: > /* 1-seetings */ > tmp = VMX_ENTRY_CTLS_DEFAULT1; > data = VM_ENTRY_LOAD_GUEST_PAT | > -- > 1.7.1 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Jan Beulich
2012-Dec-05 16:33 UTC
Re: [PATCH v2 00/11] nested vmx: bug fixes and feature enabling
>>> On 05.12.12 at 14:02, Dongxiao Xu <dongxiao.xu@intel.com> wrote: > This series of patches contain some bug fixes and feature enabling for > nested vmx, please help to review and pull. > > For the following patches, it doesn''t have influence about Xen on Xen > functionality. > No special need to backport to 4.2.x (My own opinion).Xen on Xen isn''t the only thing being cared about afaik.> nested vmx: emulate MSR bitmaps > nested vmx: use literal name instead of hard numbers > nested vmx: expose bit 55 of IA32_VMX_BASIC_MSR to guest VMM > nested vmx: fix rflags status in virtual vmexit > nested vmx: fix handling of RDTSC > nested vmx: fix DR access VM exit3 bug fixes above that I would want to know whether they are relevant for 4.2.> nested vmx: enable IA32E mode while do VM entry > nested vmx: enable "Virtualize APIC accesses" feature for L1 VMM > nested vmx: enable PAUSE and RDPMC exiting for L1 VMM > nested vmx: fix interrupt delivery to L2 guestAnd one more here. Jan> nested vmx: check host ability when intercept MSR read