Virtual interrupt delivery avoids Xen to inject vAPIC interrupts manually, which is fully taken care of by the hardware. This needs some special awareness into existing interrupr injection path: For pending interrupt from vLAPIC, instead of direct injection, we may need update architecture specific indicators before resuming to guest. Before returning to guest, RVI should be updated if any pending IRRs EOI exit bitmap controls whether an EOI write should cause VM-Exit. If set, a trap-like induced EOI VM-Exit is triggered. The approach here is to manipulate EOI exit bitmap based on value of TMR. Level triggered irq requires a hook in vLAPIC EOI write, so that vIOAPIC EOI is triggered and emulated Signed-off-by: Gang Wei <gang.wei@intel.com> Signed-off-by: Yang Zhang <yang.z.zhang@intel.com> Signed-off-by: Jiongxi Li <jiongxi.li@intel.com> diff -r 7c6844dd4a0d -r e3724bdf9403 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/arch/x86/hvm/vlapic.c Thu Sep 13 12:22:54 2012 +0800 @@ -145,6 +145,9 @@ int vlapic_set_irq(struct vlapic *vlapic if ( trig ) vlapic_set_vector(vec, &vlapic->regs->data[APIC_TMR]); + if ( hvm_funcs.update_eoi_exit_bitmap ) + hvm_funcs.update_eoi_exit_bitmap(vlapic_vcpu(vlapic), vec ,trig); + /* We may need to wake up target vcpu, besides set pending bit here */ return !vlapic_test_and_set_irr(vec, vlapic); } @@ -410,6 +413,14 @@ void vlapic_EOI_set(struct vlapic *vlapi hvm_dpci_msi_eoi(current->domain, vector); } +void vlapic_handle_EOI_induced_exit(struct vlapic *vlapic, int vector) +{ + if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) ) + vioapic_update_EOI(vlapic_domain(vlapic), vector); + + hvm_dpci_msi_eoi(current->domain, vector); +} + int vlapic_ipi( struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high) { @@ -1000,6 +1011,14 @@ void vlapic_adjust_i8259_target(struct d pt_adjust_global_vcpu_target(v); } +int vlapic_virtual_intr_delivery_enabled(void) +{ + if ( hvm_funcs.virtual_intr_delivery_enabled ) + return hvm_funcs.virtual_intr_delivery_enabled(); + else + return 0; +} + int vlapic_has_pending_irq(struct vcpu *v) { struct vlapic *vlapic = vcpu_vlapic(v); @@ -1012,6 +1031,9 @@ int vlapic_has_pending_irq(struct vcpu * if ( irr == -1 ) return -1; + if ( vlapic_virtual_intr_delivery_enabled() ) + return irr; + isr = vlapic_find_highest_isr(vlapic); isr = (isr != -1) ? isr : 0; if ( (isr & 0xf0) >= (irr & 0xf0) ) @@ -1024,6 +1046,9 @@ int vlapic_ack_pending_irq(struct vcpu * { struct vlapic *vlapic = vcpu_vlapic(v); + if ( vlapic_virtual_intr_delivery_enabled() ) + return 1; + vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]); vlapic_clear_irr(vector, vlapic); diff -r 7c6844dd4a0d -r e3724bdf9403 xen/arch/x86/hvm/vmx/intr.c --- a/xen/arch/x86/hvm/vmx/intr.c Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/arch/x86/hvm/vmx/intr.c Thu Sep 13 12:22:54 2012 +0800 @@ -206,6 +206,7 @@ void vmx_intr_assist(void) struct vcpu *v = current; unsigned int tpr_threshold = 0; enum hvm_intblk intblk; + int pt_vector = -1; /* Block event injection when single step with MTF. */ if ( unlikely(v->arch.hvm_vcpu.single_step) ) @@ -216,7 +217,7 @@ void vmx_intr_assist(void) } /* Crank the handle on interrupt state. */ - pt_update_irq(v); + pt_vector = pt_update_irq(v); do { intack = hvm_vcpu_has_pending_irq(v); @@ -227,19 +228,43 @@ void vmx_intr_assist(void) goto out; intblk = hvm_interrupt_blocked(v, intack); - if ( intblk == hvm_intblk_tpr ) + if ( cpu_has_vmx_virtual_intr_delivery ) { - ASSERT(vlapic_enabled(vcpu_vlapic(v))); - ASSERT(intack.source == hvm_intsrc_lapic); - tpr_threshold = intack.vector >> 4; - goto out; + /* Set "Interrupt-window exiting" for ExtINT */ + if ( (intblk != hvm_intblk_none) && + ( (intack.source == hvm_intsrc_pic) || + ( intack.source == hvm_intsrc_vector) ) ) + { + enable_intr_window(v, intack); + goto out; + } + + if ( __vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK ) + { + if ( (intack.source == hvm_intsrc_pic) || + (intack.source == hvm_intsrc_nmi) || + (intack.source == hvm_intsrc_mce) ) + enable_intr_window(v, intack); + + goto out; + } } + else + { + if ( intblk == hvm_intblk_tpr ) + { + ASSERT(vlapic_enabled(vcpu_vlapic(v))); + ASSERT(intack.source == hvm_intsrc_lapic); + tpr_threshold = intack.vector >> 4; + goto out; + } - if ( (intblk != hvm_intblk_none) || - (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) ) - { - enable_intr_window(v, intack); - goto out; + if ( (intblk != hvm_intblk_none) || + (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) ) + { + enable_intr_window(v, intack); + goto out; + } } intack = hvm_vcpu_ack_pending_irq(v, intack); @@ -253,6 +278,44 @@ void vmx_intr_assist(void) { hvm_inject_hw_exception(TRAP_machine_check, HVM_DELIVER_NO_ERROR_CODE); } + else if ( cpu_has_vmx_virtual_intr_delivery && + intack.source != hvm_intsrc_pic && + intack.source != hvm_intsrc_vector ) + { + unsigned long status = __vmread(GUEST_INTR_STATUS); + + /* + * Set eoi_exit_bitmap for periodic timer interrup to cause EOI-induced VM + * exit, then pending periodic time interrups have the chance to be injected + * for compensation + */ + if (pt_vector != -1) + vmx_set_eoi_exit_bitmap(v, pt_vector); + + /* we need update the RVI field */ + status &= ~(unsigned long)0x0FF; + status |= (unsigned long)0x0FF & + intack.vector; + __vmwrite(GUEST_INTR_STATUS, status); + if (v->arch.hvm_vmx.eoi_exitmap_changed) { +#ifdef __i386__ +#define UPDATE_EOI_EXITMAP(v, e) { \ + if (test_and_clear_bit(e, &v->arch.hvm_vmx.eoi_exitmap_changed)) { \ + __vmwrite(EOI_EXIT_BITMAP##e, v->arch.hvm_vmx.eoi_exit_bitmap[e]); \ + __vmwrite(EOI_EXIT_BITMAP##e##_HIGH, v.arch.hvm_vmx.eoi_exit_bitmap[e] >> 32);}} +#else +#define UPDATE_EOI_EXITMAP(v, e) { \ + if (test_and_clear_bit(e, &v->arch.hvm_vmx.eoi_exitmap_changed)) { \ + __vmwrite(EOI_EXIT_BITMAP##e, v->arch.hvm_vmx.eoi_exit_bitmap[e]);}} +#endif + UPDATE_EOI_EXITMAP(v, 0); + UPDATE_EOI_EXITMAP(v, 1); + UPDATE_EOI_EXITMAP(v, 2); + UPDATE_EOI_EXITMAP(v, 3); + } + + pt_intr_post(v, intack); + } else { HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0); @@ -262,11 +325,16 @@ void vmx_intr_assist(void) /* Is there another IRQ to queue up behind this one? */ intack = hvm_vcpu_has_pending_irq(v); - if ( unlikely(intack.source != hvm_intsrc_none) ) - enable_intr_window(v, intack); + if ( !cpu_has_vmx_virtual_intr_delivery || + intack.source == hvm_intsrc_pic || + intack.source == hvm_intsrc_vector ) + { + if ( unlikely(intack.source != hvm_intsrc_none) ) + enable_intr_window(v, intack); + } out: - if ( cpu_has_vmx_tpr_shadow ) + if ( !cpu_has_vmx_virtual_intr_delivery && cpu_has_vmx_tpr_shadow ) __vmwrite(TPR_THRESHOLD, tpr_threshold); } diff -r 7c6844dd4a0d -r e3724bdf9403 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Sep 13 12:22:54 2012 +0800 @@ -90,6 +90,7 @@ static void __init vmx_display_features( P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap"); P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest"); P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization"); + P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery"); #undef P if ( !printed ) @@ -188,11 +189,12 @@ static int vmx_init_vmcs_config(void) opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST; /* - * "APIC Register Virtualization" + * "APIC Register Virtualization" and "Virtual Interrupt Delivery" * can be set only when "use TPR shadow" is set */ if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW ) - opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT; + opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; _vmx_secondary_exec_control = adjust_vmx_controls( @@ -787,6 +789,22 @@ static int construct_vmcs(struct vcpu *v __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0)); __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE)); + if ( cpu_has_vmx_virtual_intr_delivery ) + { + /* EOI-exit bitmap */ + v->arch.hvm_vmx.eoi_exit_bitmap[0] = (uint64_t)0; + __vmwrite(EOI_EXIT_BITMAP0, v->arch.hvm_vmx.eoi_exit_bitmap[0]); + v->arch.hvm_vmx.eoi_exit_bitmap[1] = (uint64_t)0; + __vmwrite(EOI_EXIT_BITMAP1, v->arch.hvm_vmx.eoi_exit_bitmap[1]); + v->arch.hvm_vmx.eoi_exit_bitmap[2] = (uint64_t)0; + __vmwrite(EOI_EXIT_BITMAP2, v->arch.hvm_vmx.eoi_exit_bitmap[2]); + v->arch.hvm_vmx.eoi_exit_bitmap[3] = (uint64_t)0; + __vmwrite(EOI_EXIT_BITMAP3, v->arch.hvm_vmx.eoi_exit_bitmap[3]); + + /* Initialise Guest Interrupt Status (RVI and SVI) to 0 */ + __vmwrite(GUEST_INTR_STATUS, 0); + } + /* Host data selectors. */ __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS); __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS); @@ -1028,6 +1046,30 @@ int vmx_add_host_load_msr(u32 msr) return 0; } +void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector) +{ + int index, offset, changed; + + index = vector >> 6; + offset = vector & 63; + changed = !test_and_set_bit(offset, + (uint64_t *)&v->arch.hvm_vmx.eoi_exit_bitmap[index]); + if (changed) + set_bit(index, &v->arch.hvm_vmx.eoi_exitmap_changed); +} + +void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector) +{ + int index, offset, changed; + + index = vector >> 6; + offset = vector & 63; + changed = test_and_clear_bit(offset, + (uint64_t *)&v->arch.hvm_vmx.eoi_exit_bitmap[index]); + if (changed) + set_bit(index, &v->arch.hvm_vmx.eoi_exitmap_changed); +} + int vmx_create_vmcs(struct vcpu *v) { struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; diff -r 7c6844dd4a0d -r e3724bdf9403 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Sep 13 12:22:54 2012 +0800 @@ -1502,6 +1502,22 @@ static void vmx_set_info_guest(struct vc vmx_vmcs_exit(v); } +static void vmx_update_eoi_exit_bitmap(struct vcpu *v, u8 vector, u8 trig) +{ + if ( cpu_has_vmx_virtual_intr_delivery ) + { + if (trig) + vmx_set_eoi_exit_bitmap(v, vector); + else + vmx_clear_eoi_exit_bitmap(v, vector); + } +} + +static int vmx_virtual_intr_delivery_enabled(void) +{ + return cpu_has_vmx_virtual_intr_delivery; +} + static struct hvm_function_table __read_mostly vmx_function_table = { .name = "VMX", .cpu_up_prepare = vmx_cpu_up_prepare, @@ -1548,7 +1564,9 @@ static struct hvm_function_table __read_ .nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception, .nhvm_vcpu_vmexit_trap = nvmx_vmexit_trap, .nhvm_intr_blocked = nvmx_intr_blocked, - .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources + .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources, + .update_eoi_exit_bitmap = vmx_update_eoi_exit_bitmap, + .virtual_intr_delivery_enabled = vmx_virtual_intr_delivery_enabled }; struct hvm_function_table * __init start_vmx(void) @@ -2284,6 +2302,17 @@ static int vmx_handle_apic_write(void) return vlapic_apicv_write(current, offset); } +/* + * When "Virtual Interrupt Delivery" is enabled, this function is used + * to handle EOI-induced VM exit + */ +void vmx_handle_EOI_induced_exit(struct vlapic *vlapic, int vector) +{ + ASSERT(cpu_has_vmx_virtual_intr_delivery); + + vlapic_handle_EOI_induced_exit(vlapic, vector); +} + void vmx_vmexit_handler(struct cpu_user_regs *regs) { unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0; @@ -2677,6 +2706,16 @@ void vmx_vmexit_handler(struct cpu_user_ hvm_inject_hw_exception(TRAP_gp_fault, 0); break; + case EXIT_REASON_EOI_INDUCED: + { + int vector; + exit_qualification = __vmread(EXIT_QUALIFICATION); + vector = exit_qualification & 0xff; + + vmx_handle_EOI_induced_exit(vcpu_vlapic(current), vector); + break; + } + case EXIT_REASON_IO_INSTRUCTION: exit_qualification = __vmread(EXIT_QUALIFICATION); if ( exit_qualification & 0x10 ) diff -r 7c6844dd4a0d -r e3724bdf9403 xen/arch/x86/hvm/vpt.c --- a/xen/arch/x86/hvm/vpt.c Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/arch/x86/hvm/vpt.c Thu Sep 13 12:22:54 2012 +0800 @@ -212,7 +212,7 @@ static void pt_timer_fn(void *data) pt_unlock(pt); } -void pt_update_irq(struct vcpu *v) +int pt_update_irq(struct vcpu *v) { struct list_head *head = &v->arch.hvm_vcpu.tm_list; struct periodic_time *pt, *temp, *earliest_pt = NULL; @@ -245,7 +245,7 @@ void pt_update_irq(struct vcpu *v) if ( earliest_pt == NULL ) { spin_unlock(&v->arch.hvm_vcpu.tm_lock); - return; + return -1; } earliest_pt->irq_issued = 1; @@ -263,6 +263,17 @@ void pt_update_irq(struct vcpu *v) hvm_isa_irq_deassert(v->domain, irq); hvm_isa_irq_assert(v->domain, irq); } + + /* + * If periodic timer interrut is handled by lapic, its vector in + * IRR is returned and used to set eoi_exit_bitmap for virtual + * interrupt delivery case. Otherwise return -1 to do nothing. + */ + if ( vlapic_accept_pic_intr(v) && + (&v->domain->arch.hvm_domain)->vpic[0].int_output ) + return -1; + else + return pt_irq_vector(earliest_pt, hvm_intsrc_lapic); } static struct periodic_time *is_pt_irq( diff -r 7c6844dd4a0d -r e3724bdf9403 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/include/asm-x86/hvm/hvm.h Thu Sep 13 12:22:54 2012 +0800 @@ -180,6 +180,10 @@ struct hvm_function_table { enum hvm_intblk (*nhvm_intr_blocked)(struct vcpu *v); void (*nhvm_domain_relinquish_resources)(struct domain *d); + + /* Virtual interrupt delivery */ + void (*update_eoi_exit_bitmap)(struct vcpu *v, u8 vector, u8 trig); + int (*virtual_intr_delivery_enabled)(void); }; extern struct hvm_function_table hvm_funcs; diff -r 7c6844dd4a0d -r e3724bdf9403 xen/include/asm-x86/hvm/vlapic.h --- a/xen/include/asm-x86/hvm/vlapic.h Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/include/asm-x86/hvm/vlapic.h Thu Sep 13 12:22:54 2012 +0800 @@ -100,6 +100,7 @@ int vlapic_accept_pic_intr(struct vcpu * void vlapic_adjust_i8259_target(struct domain *d); void vlapic_EOI_set(struct vlapic *vlapic); +void vlapic_handle_EOI_induced_exit(struct vlapic *vlapic, int vector); int vlapic_ipi(struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high); diff -r 7c6844dd4a0d -r e3724bdf9403 xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Sep 13 12:22:54 2012 +0800 @@ -110,6 +110,9 @@ struct arch_vmx_struct { unsigned int host_msr_count; struct vmx_msr_entry *host_msr_area; + uint32_t eoi_exitmap_changed; + uint64_t eoi_exit_bitmap[4]; + unsigned long host_cr0; /* Is the guest in real mode? */ @@ -183,6 +186,7 @@ extern u32 vmx_vmentry_control; #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 extern u32 vmx_secondary_exec_control; @@ -233,6 +237,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr (vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) #define cpu_has_vmx_apic_reg_virt \ (vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT) +#define cpu_has_vmx_virtual_intr_delivery \ + (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define VMX_INTR_SHADOW_STI 0x00000001 @@ -251,6 +257,7 @@ enum vmcs_field { GUEST_GS_SELECTOR = 0x0000080a, GUEST_LDTR_SELECTOR = 0x0000080c, GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, HOST_ES_SELECTOR = 0x00000c00, HOST_CS_SELECTOR = 0x00000c02, HOST_SS_SELECTOR = 0x00000c04, @@ -278,6 +285,14 @@ enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, @@ -398,6 +413,8 @@ int vmx_write_guest_msr(u32 msr, u64 val int vmx_add_guest_msr(u32 msr); int vmx_add_host_load_msr(u32 msr); void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to); +void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector); +void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector); #endif /* ASM_X86_HVM_VMX_VMCS_H__ */ diff -r 7c6844dd4a0d -r e3724bdf9403 xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Thu Sep 13 12:22:54 2012 +0800 @@ -119,6 +119,7 @@ void vmx_update_cpu_exec_control(struct #define EXIT_REASON_MCE_DURING_VMENTRY 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_ACCESS_GDTR_OR_IDTR 46 #define EXIT_REASON_ACCESS_LDTR_OR_TR 47 #define EXIT_REASON_EPT_VIOLATION 48 diff -r 7c6844dd4a0d -r e3724bdf9403 xen/include/asm-x86/hvm/vpt.h --- a/xen/include/asm-x86/hvm/vpt.h Tue Sep 11 15:34:36 2012 +0800 +++ b/xen/include/asm-x86/hvm/vpt.h Thu Sep 13 12:22:54 2012 +0800 @@ -141,7 +141,7 @@ struct pl_time { /* platform time */ void pt_save_timer(struct vcpu *v); void pt_restore_timer(struct vcpu *v); -void pt_update_irq(struct vcpu *v); +int pt_update_irq(struct vcpu *v); void pt_intr_post(struct vcpu *v, struct hvm_intack intack); void pt_migrate(struct vcpu *v);