Jan Beulich
2007-Nov-22 16:59 UTC
[Xen-devel] [PATCH] x86: emulate I/O port access breakpoints
Emulate the trapping on I/O port accesses when emulating IN/OUT. Also allow 8-byte breakpoints on x86-64 (and on i686 if the hardware supports them), and tighten the condition for loading debug registers during context switch. This patch depends on the prior single step injection patch. Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: 2007-11-13/xen/arch/x86/domain.c ==================================================================--- 2007-11-13.orig/xen/arch/x86/domain.c 2007-11-12 08:47:42.000000000 +0100 +++ 2007-11-13/xen/arch/x86/domain.c 2007-11-21 09:44:48.000000000 +0100 @@ -42,6 +42,7 @@ #include <asm/hypercall.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> +#include <asm/debugreg.h> #include <asm/msr.h> #include <asm/nmi.h> #include <asm/iommu.h> @@ -1219,7 +1220,7 @@ static void paravirt_ctxt_switch_from(st * inside Xen, before we get a chance to reload DR7, and this cannot always * safely be handled. */ - if ( unlikely(v->arch.guest_context.debugreg[7]) ) + if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) write_debugreg(7, 0); } @@ -1234,7 +1235,7 @@ static void paravirt_ctxt_switch_to(stru if ( unlikely(cr4 != read_cr4()) ) write_cr4(cr4); - if ( unlikely(v->arch.guest_context.debugreg[7]) ) + if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) { write_debugreg(0, v->arch.guest_context.debugreg[0]); write_debugreg(1, v->arch.guest_context.debugreg[1]); Index: 2007-11-13/xen/arch/x86/domctl.c ==================================================================--- 2007-11-13.orig/xen/arch/x86/domctl.c 2007-11-12 08:47:42.000000000 +0100 +++ 2007-11-13/xen/arch/x86/domctl.c 2007-11-21 12:37:04.000000000 +0100 @@ -825,12 +825,18 @@ void arch_get_info_guest(struct vcpu *v, c.nat->ctrlreg[1] = xen_pfn_to_cr3( pagetable_get_pfn(v->arch.guest_table_user)); #endif + + c.nat->debugreg[7] |= c.nat->debugreg[5]; + c.nat->debugreg[5] = 0; } #ifdef CONFIG_COMPAT else { l4_pgentry_t *l4e = __va(pagetable_get_paddr(v->arch.guest_table)); c.cmp->ctrlreg[3] = compat_pfn_to_cr3(l4e_get_pfn(*l4e)); + + c.cmp->debugreg[7] |= c.cmp->debugreg[5]; + c.cmp->debugreg[5] = 0; } #endif Index: 2007-11-13/xen/arch/x86/hvm/svm/svm.c ==================================================================--- 2007-11-13.orig/xen/arch/x86/hvm/svm/svm.c 2007-11-20 16:46:55.000000000 +0100 +++ 2007-11-13/xen/arch/x86/hvm/svm/svm.c 2007-11-21 09:47:01.000000000 +0100 @@ -34,6 +34,7 @@ #include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/types.h> +#include <asm/debugreg.h> #include <asm/msr.h> #include <asm/spinlock.h> #include <asm/hvm/hvm.h> @@ -189,8 +190,6 @@ static void __restore_debug_registers(st * if one of the breakpoints is enabled. So mask out all bits that don''t * enable some breakpoint functionality. */ -#define DR7_ACTIVE_MASK 0xff - static void svm_restore_dr(struct vcpu *v) { if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) Index: 2007-11-13/xen/arch/x86/hvm/vmx/vmx.c ==================================================================--- 2007-11-13.orig/xen/arch/x86/hvm/vmx/vmx.c 2007-11-20 16:32:57.000000000 +0100 +++ 2007-11-13/xen/arch/x86/hvm/vmx/vmx.c 2007-11-21 09:47:07.000000000 +0100 @@ -33,6 +33,7 @@ #include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/types.h> +#include <asm/debugreg.h> #include <asm/msr.h> #include <asm/spinlock.h> #include <asm/paging.h> @@ -434,8 +435,6 @@ static void __restore_debug_registers(st * if one of the breakpoints is enabled. So mask out all bits that don''t * enable some breakpoint functionality. */ -#define DR7_ACTIVE_MASK 0xff - static void vmx_restore_dr(struct vcpu *v) { /* NB. __vmread() is not usable here, so we cannot read from the VMCS. */ Index: 2007-11-13/xen/arch/x86/traps.c ==================================================================--- 2007-11-13.orig/xen/arch/x86/traps.c 2007-11-20 15:46:19.000000000 +0100 +++ 2007-11-13/xen/arch/x86/traps.c 2007-11-21 11:29:05.000000000 +0100 @@ -412,17 +412,51 @@ static int do_guest_trap( return 0; } -static void instruction_done(struct cpu_user_regs *regs, unsigned long eip) +static void instruction_done(struct cpu_user_regs *regs, + unsigned long eip, unsigned int bpmatch) { regs->eip = eip; regs->eflags &= ~X86_EFLAGS_RF; - if ( regs->eflags & X86_EFLAGS_TF ) + if ( bpmatch || (regs->eflags & X86_EFLAGS_TF) ) { - current->arch.guest_context.debugreg[6] |= 0xffff4ff0; + current->arch.guest_context.debugreg[6] |= bpmatch | 0xffff0ff0; + if ( regs->eflags & X86_EFLAGS_TF ) + current->arch.guest_context.debugreg[6] |= 0x4000; do_guest_trap(TRAP_debug, regs, 0); } } +static unsigned int check_guest_io_breakpoint(struct vcpu *v, + unsigned int port, unsigned int len) +{ + unsigned int match = 0; + + if ( unlikely(v->arch.guest_context.ctrlreg[4] & X86_CR4_DE) ) + { + unsigned int i; + + for ( i = 0; i < 4; ++i ) + if ( v->arch.guest_context.debugreg[5] & + (3 << (i * DR_ENABLE_SIZE)) ) + { + unsigned long start = v->arch.guest_context.debugreg[i]; + unsigned int width = 0; + + switch ( (v->arch.guest_context.debugreg[7] >> + (DR_CONTROL_SHIFT + i * DR_CONTROL_SIZE)) & 0xc ) + { + case DR_LEN_1: width = 1; break; + case DR_LEN_2: width = 2; break; + case DR_LEN_4: width = 4; break; + case DR_LEN_8: width = 8; break; + } + if ( start < port + len && start + width > port ) + match |= 1 << i; + } + } + return match; +} + /* * Called from asm to set up the NMI trapbounce info. * Returns 0 if no callback is set up, else 1. @@ -639,7 +673,6 @@ static int emulate_forced_invalid_op(str { /* Modify Feature Information. */ clear_bit(X86_FEATURE_VME, &d); - clear_bit(X86_FEATURE_DE, &d); clear_bit(X86_FEATURE_PSE, &d); clear_bit(X86_FEATURE_PGE, &d); if ( !cpu_has_sep ) @@ -668,7 +701,7 @@ static int emulate_forced_invalid_op(str regs->ebx = b; regs->ecx = c; regs->edx = d; - instruction_done(regs, eip); + instruction_done(regs, eip, 0); trace_trap_one_addr(TRC_PV_FORCED_INVALID_OP, regs->eip); @@ -1325,7 +1358,7 @@ static int emulate_privileged_op(struct unsigned long *reg, eip = regs->eip, res; u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0; enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none; - unsigned int port, i, data_sel, ar, data, rc; + unsigned int port, i, data_sel, ar, data, rc, bpmatch = 0; unsigned int op_bytes, op_default, ad_bytes, ad_default; #define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \ ? regs->reg \ @@ -1475,6 +1508,8 @@ static int emulate_privileged_op(struct } #endif + port = (u16)regs->edx; + continue_io_string: switch ( opcode ) { @@ -1483,9 +1518,8 @@ static int emulate_privileged_op(struct case 0x6d: /* INSW/INSL */ if ( data_limit < op_bytes - 1 || rd_ad(edi) > data_limit - (op_bytes - 1) || - !guest_io_okay((u16)regs->edx, op_bytes, v, regs) ) + !guest_io_okay(port, op_bytes, v, regs) ) goto fail; - port = (u16)regs->edx; switch ( op_bytes ) { case 1: @@ -1515,7 +1549,7 @@ static int emulate_privileged_op(struct case 0x6f: /* OUTSW/OUTSL */ if ( data_limit < op_bytes - 1 || rd_ad(esi) > data_limit - (op_bytes - 1) || - !guest_io_okay((u16)regs->edx, op_bytes, v, regs) ) + !guest_io_okay(port, op_bytes, v, regs) ) goto fail; rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), op_bytes); if ( rc != 0 ) @@ -1523,7 +1557,6 @@ static int emulate_privileged_op(struct propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 0); return EXCRET_fault_fixed; } - port = (u16)regs->edx; switch ( op_bytes ) { case 1: @@ -1549,9 +1582,11 @@ static int emulate_privileged_op(struct break; } + bpmatch = check_guest_io_breakpoint(v, port, op_bytes); + if ( rep_prefix && (wr_ad(ecx, regs->ecx - 1) != 0) ) { - if ( !hypercall_preempt_check() ) + if ( !bpmatch && !hypercall_preempt_check() ) goto continue_io_string; eip = regs->eip; } @@ -1630,6 +1665,7 @@ static int emulate_privileged_op(struct regs->eax = (u32)~0; break; } + bpmatch = check_guest_io_breakpoint(v, port, op_bytes); goto done; case 0xec: /* IN %dx,%al */ @@ -1667,6 +1703,7 @@ static int emulate_privileged_op(struct io_emul(regs); break; } + bpmatch = check_guest_io_breakpoint(v, port, op_bytes); goto done; case 0xee: /* OUT %al,%dx */ @@ -1960,7 +1997,7 @@ static int emulate_privileged_op(struct #undef rd_ad done: - instruction_done(regs, eip); + instruction_done(regs, eip, bpmatch); return EXCRET_fault_fixed; fail: @@ -2330,7 +2367,7 @@ static int emulate_gate_op(struct cpu_u sel |= (regs->cs & 3); regs->cs = sel; - instruction_done(regs, off); + instruction_done(regs, off, 0); #endif return 0; @@ -2842,25 +2879,44 @@ long set_debugreg(struct vcpu *v, int re /* * DR7: Bit 10 reserved (set to 1). * Bits 11-12,14-15 reserved (set to 0). + */ + value &= ~DR_CONTROL_RESERVED_ZERO; /* reserved bits => 0 */ + value |= DR_CONTROL_RESERVED_ONE; /* reserved bits => 1 */ + /* * Privileged bits: * GD (bit 13): must be 0. - * R/Wn (bits 16-17,20-21,24-25,28-29): mustn''t be 10. - * LENn (bits 18-19,22-23,26-27,30-31): mustn''t be 10. */ - /* DR7 == 0 => debugging disabled for this domain. */ - if ( value != 0 ) + if ( value & DR_GENERAL_DETECT ) + return -EPERM; + /* DR7.{G,L}E = 0 => debugging disabled for this domain. */ + if ( value & DR7_ACTIVE_MASK ) { - value &= 0xffff27ff; /* reserved bits => 0 */ - value |= 0x00000400; /* reserved bits => 1 */ - if ( (value & (1<<13)) != 0 ) return -EPERM; - for ( i = 0; i < 16; i += 2 ) - if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM; + unsigned int io_enable = 0; + + for ( i = DR_CONTROL_SHIFT; i < 32; i += DR_CONTROL_SIZE ) + { + if ( ((value >> i) & 3) == DR_IO ) + { + if ( !(v->arch.guest_context.ctrlreg[4] & X86_CR4_DE) ) + return -EPERM; + io_enable |= value & (3 << ((i - 16) >> 1)); + } +#ifdef __i386__ + if ( (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + !boot_cpu_has(X86_FEATURE_LM)) && + ((value >> i) & 0xc) == DR_LEN_8 ) + return -EPERM; +#endif + } + v->arch.guest_context.debugreg[5] = io_enable; + value &= ~io_enable; /* * If DR7 was previously clear then we need to load all other * debug registers at this point as they were not restored during * context switch. */ - if ( (v == curr) && (v->arch.guest_context.debugreg[7] == 0) ) + if ( (v == curr) && + !(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) { write_debugreg(0, v->arch.guest_context.debugreg[0]); write_debugreg(1, v->arch.guest_context.debugreg[1]); @@ -2868,9 +2924,9 @@ long set_debugreg(struct vcpu *v, int re write_debugreg(3, v->arch.guest_context.debugreg[3]); write_debugreg(6, v->arch.guest_context.debugreg[6]); } + if ( v == curr ) + write_debugreg(7, value); } - if ( v == curr ) - write_debugreg(7, value); break; default: return -EINVAL; @@ -2887,8 +2943,19 @@ long do_set_debugreg(int reg, unsigned l unsigned long do_get_debugreg(int reg) { - if ( (reg < 0) || (reg > 7) ) return -EINVAL; - return current->arch.guest_context.debugreg[reg]; + switch ( reg ) + { + case 0 ... 3: + case 6: + return current->arch.guest_context.debugreg[reg]; + case 7: + return current->arch.guest_context.debugreg[7] | + current->arch.guest_context.debugreg[5]; + case 4 ... 5: + return current->arch.guest_context.ctrlreg[4] & X86_CR4_DE ? + current->arch.guest_context.debugreg[reg + 2] : 0; + } + return -EINVAL; } /* Index: 2007-11-13/xen/include/asm-x86/debugreg.h ==================================================================--- 2007-11-13.orig/xen/include/asm-x86/debugreg.h 2005-11-17 15:51:06.000000000 +0100 +++ 2007-11-13/xen/include/asm-x86/debugreg.h 2007-11-21 09:39:32.000000000 +0100 @@ -33,11 +33,13 @@ #define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */ #define DR_RW_WRITE (0x1) +#define DR_IO (0x2) #define DR_RW_READ (0x3) #define DR_LEN_1 (0x0) /* Settings for data length to trap on */ #define DR_LEN_2 (0x4) #define DR_LEN_4 (0xC) +#define DR_LEN_8 (0x8) /* The low byte to the control register determine which registers are enabled. There are 4 fields of two bits. One bit is "local", meaning @@ -53,12 +55,16 @@ #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ #define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */ +#define DR7_ACTIVE_MASK (DR_LOCAL_ENABLE_MASK|DR_GLOBAL_ENABLE_MASK) + /* The second byte to the control register has a few special things. We can slow the instruction pipeline for instructions coming via the gdt or the ldt if we want to. I am not sure why this is an advantage */ -#define DR_CONTROL_RESERVED (~0xFFFF03FFUL) /* Reserved by Intel */ -#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ -#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ +#define DR_CONTROL_RESERVED_ZERO (~0xFFFF23FFUL) /* Reserved, read as zero */ +#define DR_CONTROL_RESERVED_ONE ( 0x00000400 ) /* Reserved, read as one */ +#define DR_LOCAL_EXACT_ENABLE ( 0x00000100 ) /* Local exact enable */ +#define DR_GLOBAL_EXACT_ENABLE ( 0x00000200 ) /* Global exact enable */ +#define DR_GENERAL_DETECT ( 0x00002000 ) /* General detect enable */ #endif /* _X86_DEBUGREG_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel