1: x86/HVM: properly handle MMIO reads and writes wider than a machine word 2: x86: fix dependencies of emulator test 3: x86_emulate: MOVSXD must read source operand just once 4: x86_emulate: PUSH <mem> must read source operand just once 5: x86_emulate: fix flag setting for 8-bit signed multiplication 6: x86_emulate: fold wide reads Signed-off-by: Jan Beulich <jbeulich@suse.com>
Jan Beulich
2013-Sep-19 15:42 UTC
[PATCH 1/6] x86/HVM: properly handle MMIO reads and writes wider than a machine word
Just like real hardware we ought to split such accesses transparently to the caller. With little extra effort we can at once even handle page crossing accesses correctly. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -438,6 +438,7 @@ static int __hvmemul_read( { struct vcpu *curr = current; unsigned long addr, reps = 1; + unsigned int off, chunk = min_t(unsigned int, bytes, sizeof(long)); uint32_t pfec = PFEC_page_present; struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; paddr_t gpa; @@ -447,16 +448,38 @@ static int __hvmemul_read( seg, offset, bytes, &reps, access_type, hvmemul_ctxt, &addr); if ( rc != X86EMUL_OKAY ) return rc; + off = addr & (PAGE_SIZE - 1); + /* + * We only need to handle sizes actual instruction operands can have. All + * such sizes are either powers of 2 or the sum of two powers of 2. Thus + * picking as initial chunk size the largest power of 2 not greater than + * the total size will always result in only power-of-2 size requests + * issued to hvmemul_do_mmio() (hvmemul_do_io() rejects non-powers-of-2). + */ + while ( chunk & (chunk - 1) ) + chunk &= chunk - 1; + if ( off + bytes > PAGE_SIZE ) + while ( off & (chunk - 1) ) + chunk >>= 1; if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) { - unsigned int off = addr & (PAGE_SIZE - 1); if ( access_type == hvm_access_insn_fetch ) return X86EMUL_UNHANDLEABLE; gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); - if ( (off + bytes) <= PAGE_SIZE ) - return hvmemul_do_mmio(gpa, &reps, bytes, 0, - IOREQ_READ, 0, p_data); + while ( (off + chunk) <= PAGE_SIZE ) + { + rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_READ, 0, p_data); + if ( rc != X86EMUL_OKAY || bytes == chunk ) + return rc; + addr += chunk; + off += chunk; + gpa += chunk; + p_data += chunk; + bytes -= chunk; + if ( bytes < chunk ) + chunk = bytes; + } } if ( (seg != x86_seg_none) && @@ -473,14 +496,32 @@ static int __hvmemul_read( return X86EMUL_EXCEPTION; case HVMCOPY_unhandleable: return X86EMUL_UNHANDLEABLE; - case HVMCOPY_bad_gfn_to_mfn: + case HVMCOPY_bad_gfn_to_mfn: if ( access_type == hvm_access_insn_fetch ) return X86EMUL_UNHANDLEABLE; - rc = hvmemul_linear_to_phys( - addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt); - if ( rc != X86EMUL_OKAY ) - return rc; - return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, p_data); + rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, + hvmemul_ctxt); + while ( rc == X86EMUL_OKAY ) + { + rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_READ, 0, p_data); + if ( rc != X86EMUL_OKAY || bytes == chunk ) + break; + addr += chunk; + off += chunk; + p_data += chunk; + bytes -= chunk; + if ( bytes < chunk ) + chunk = bytes; + if ( off < PAGE_SIZE ) + gpa += chunk; + else + { + rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, + hvmemul_ctxt); + off = 0; + } + } + return rc; case HVMCOPY_gfn_paged_out: return X86EMUL_RETRY; case HVMCOPY_gfn_shared: @@ -537,6 +578,7 @@ static int hvmemul_write( container_of(ctxt, struct hvm_emulate_ctxt, ctxt); struct vcpu *curr = current; unsigned long addr, reps = 1; + unsigned int off, chunk = min_t(unsigned int, bytes, sizeof(long)); uint32_t pfec = PFEC_page_present | PFEC_write_access; struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; paddr_t gpa; @@ -546,14 +588,30 @@ static int hvmemul_write( seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr); if ( rc != X86EMUL_OKAY ) return rc; + off = addr & (PAGE_SIZE - 1); + /* See the respective comment in __hvmemul_read(). */ + while ( chunk & (chunk - 1) ) + chunk &= chunk - 1; + if ( off + bytes > PAGE_SIZE ) + while ( off & (chunk - 1) ) + chunk >>= 1; if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) { - unsigned int off = addr & (PAGE_SIZE - 1); gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); - if ( (off + bytes) <= PAGE_SIZE ) - return hvmemul_do_mmio(gpa, &reps, bytes, 0, - IOREQ_WRITE, 0, p_data); + while ( (off + chunk) <= PAGE_SIZE ) + { + rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_WRITE, 0, p_data); + if ( rc != X86EMUL_OKAY || bytes == chunk ) + return rc; + addr += chunk; + off += chunk; + gpa += chunk; + p_data += chunk; + bytes -= chunk; + if ( bytes < chunk ) + chunk = bytes; + } } if ( (seg != x86_seg_none) && @@ -569,12 +627,29 @@ static int hvmemul_write( case HVMCOPY_unhandleable: return X86EMUL_UNHANDLEABLE; case HVMCOPY_bad_gfn_to_mfn: - rc = hvmemul_linear_to_phys( - addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt); - if ( rc != X86EMUL_OKAY ) - return rc; - return hvmemul_do_mmio(gpa, &reps, bytes, 0, - IOREQ_WRITE, 0, p_data); + rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, + hvmemul_ctxt); + while ( rc == X86EMUL_OKAY ) + { + rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_WRITE, 0, p_data); + if ( rc != X86EMUL_OKAY || bytes == chunk ) + break; + addr += chunk; + off += chunk; + p_data += chunk; + bytes -= chunk; + if ( bytes < chunk ) + chunk = bytes; + if ( off < PAGE_SIZE ) + gpa += chunk; + else + { + rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, + hvmemul_ctxt); + off = 0; + } + } + return rc; case HVMCOPY_gfn_paged_out: return X86EMUL_RETRY; case HVMCOPY_gfn_shared: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Rather than mentioning the (linked) directory, mention the files thus making sure things get rebuild as needed when the core emulator files change. Also enable debug info generation unconditionally, as this is testing stuff only anyway. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/tools/tests/x86_emulator/Makefile +++ b/tools/tests/x86_emulator/Makefile @@ -37,14 +37,13 @@ clean: .PHONY: install install: -.PHONY: x86_emulate -x86_emulate: +x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h: [ -L x86_emulate ] || ln -sf $(XEN_ROOT)/xen/arch/x86/x86_emulate . HOSTCFLAGS += $(CFLAGS_xeninclude) -x86_emulate.o: x86_emulate.c x86_emulate - $(HOSTCC) $(HOSTCFLAGS) -c -o $@ $< +x86_emulate.o: x86_emulate.c x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h + $(HOSTCC) $(HOSTCFLAGS) -c -g -o $@ $< -test_x86_emulator.o: test_x86_emulator.c blowfish.h x86_emulate - $(HOSTCC) $(HOSTCFLAGS) -c -o $@ $< +test_x86_emulator.o: test_x86_emulator.c blowfish.h x86_emulate/x86_emulate.h + $(HOSTCC) $(HOSTCFLAGS) -c -g -o $@ $< _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Sep-19 15:44 UTC
[PATCH 3/6] x86_emulate: MOVSXD must read source operand just once
... for the case of accessing MMIO. Also streamline the ARPL emulation a little, and add tests for both instructions (the MOVSXD one requires a few other adjustments, as we now need to run in a mode where the emulator''s mode_64bit() returns true). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -21,6 +21,8 @@ #define EFLG_PF (1<<2) #define EFLG_CF (1<<0) +static unsigned int bytes_read; + static int read( unsigned int seg, unsigned long offset, @@ -28,6 +30,18 @@ static int read( unsigned int bytes, struct x86_emulate_ctxt *ctxt) { + bytes_read += bytes; + memcpy(p_data, (void *)offset, bytes); + return X86EMUL_OKAY; +} + +static int fetch( + unsigned int seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + struct x86_emulate_ctxt *ctxt) +{ memcpy(p_data, (void *)offset, bytes); return X86EMUL_OKAY; } @@ -140,7 +154,7 @@ int get_fpu( static struct x86_emulate_ops emulops = { .read = read, - .insn_fetch = read, + .insn_fetch = fetch, .write = write, .cmpxchg = cmpxchg, .cpuid = cpuid, @@ -162,8 +176,8 @@ int main(int argc, char **argv) ctxt.regs = ®s; ctxt.force_writeback = 0; - ctxt.addr_size = 32; - ctxt.sp_size = 32; + ctxt.addr_size = 8 * sizeof(void *); + ctxt.sp_size = 8 * sizeof(void *); res = mmap((void *)0x100000, MMAP_SZ, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); @@ -428,6 +442,44 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); +#ifndef __x86_64__ + printf("%-40s", "Testing arpl %cx,(%%eax)..."); + instr[0] = 0x63; instr[1] = 0x08; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = 0x22222222; + regs.eax = (unsigned long)res; + *res = 0x33331111; + bytes_read = 0; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (*res != 0x33331112) || + (regs.ecx != 0x22222222) || + !(regs.eflags & EFLG_ZF) || + (regs.eip != (unsigned long)&instr[2]) ) + goto fail; +#else + printf("%-40s", "Testing movsxd (%%rax),%%rcx..."); + instr[0] = 0x48; instr[1] = 0x63; instr[2] = 0x08; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = 0x123456789abcdef; + regs.eax = (unsigned long)res; + *res = 0xfedcba98; + bytes_read = 0; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (*res != 0xfedcba98) || + (regs.ecx != 0xfffffffffedcba98) || + (regs.eip != (unsigned long)&instr[3]) ) + goto fail; + if ( bytes_read != 4 ) + { + printf("%u bytes read - ", bytes_read); + goto fail; + } +#endif + printf("okay\n"); + printf("%-40s", "Testing xadd %%ax,(%%ecx)..."); instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0xc1; instr[3] = 0x01; regs.eflags = 0x200; @@ -445,7 +497,11 @@ int main(int argc, char **argv) printf("okay\n"); printf("%-40s", "Testing dec %%ax..."); +#ifndef __x86_64__ instr[0] = 0x66; instr[1] = 0x48; +#else + instr[0] = 0x66; instr[1] = 0xff; instr[2] = 0xc8; +#endif regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.eax = 0x00000000; @@ -453,7 +509,7 @@ int main(int argc, char **argv) if ( (rc != X86EMUL_OKAY) || (regs.eax != 0x0000ffff) || ((regs.eflags&0x240) != 0x200) || - (regs.eip != (unsigned long)&instr[2]) ) + (regs.eip != (unsigned long)&instr[2 + (ctxt.addr_size > 32)]) ) goto fail; printf("okay\n"); @@ -821,6 +877,8 @@ int main(int argc, char **argv) if ( j == 2 ) break; memcpy(res, blowfish32_code, sizeof(blowfish32_code)); #else + ctxt.addr_size = 16 << j; + ctxt.sp_size = 16 << j; memcpy(res, (j == 1) ? blowfish32_code : blowfish64_code, (j == 1) ? sizeof(blowfish32_code) : sizeof(blowfish64_code)); #endif --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -92,7 +92,7 @@ static uint8_t opcode_table[256] = { ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, /* 0x60 - 0x67 */ - ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcMem16|ModRM|Mov, + ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcNone|ModRM|Mov, 0, 0, 0, 0, /* 0x68 - 0x6F */ ImplicitOps|Mov, DstReg|SrcImm|ModRM|Mov, @@ -2021,9 +2021,9 @@ x86_emulate( if ( mode_64bit() ) { /* movsxd */ - if ( src.type == OP_REG ) - src.val = *(int32_t *)src.reg; - else if ( (rc = read_ulong(src.mem.seg, src.mem.off, + if ( ea.type == OP_REG ) + src.val = *ea.reg; + else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, 4, ctxt, ops)) ) goto done; dst.val = (int32_t)src.val; @@ -2031,14 +2031,25 @@ x86_emulate( else { /* arpl */ - uint16_t src_val = dst.val; - dst = src; - _regs.eflags &= ~EFLG_ZF; - _regs.eflags |= ((src_val & 3) > (dst.val & 3)) ? EFLG_ZF : 0; - if ( _regs.eflags & EFLG_ZF ) - dst.val = (dst.val & ~3) | (src_val & 3); + unsigned int src_rpl = dst.val & 3; + + dst = ea; + dst.bytes = 2; + if ( dst.type == OP_REG ) + dst.val = *dst.reg; + else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off, + &dst.val, 2, ctxt, ops)) ) + goto done; + if ( src_rpl > (dst.val & 3) ) + { + _regs.eflags |= EFLG_ZF; + dst.val = (dst.val & ~3) | src_rpl; + } else + { + _regs.eflags &= ~EFLG_ZF; dst.type = OP_NONE; + } generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1); } break; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Sep-19 15:44 UTC
[PATCH 4/6] x86_emulate: PUSH <mem> must read source operand just once
... for the case of accessing MMIO. Rather than doing the early operand type adjustment for just for that case, do it for all of the 0xF6, 0xF7, and 0xFF groups (allowing some other code to be dropped instead). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1632,6 +1632,41 @@ x86_emulate( if ( override_seg != -1 ) ea.mem.seg = override_seg; + /* Early operand adjustments. */ + if ( !twobyte ) + switch ( b ) + { + case 0xf6 ... 0xf7: /* Grp3 */ + switch ( modrm_reg & 7 ) + { + case 0 ... 1: /* test */ + d = (d & ~SrcMask) | SrcImm; + break; + case 4: /* mul */ + case 5: /* imul */ + case 6: /* div */ + case 7: /* idiv */ + d = (d & (ByteOp | ModRM)) | DstImplicit | SrcMem; + break; + } + break; + case 0xff: /* Grp5 */ + switch ( modrm_reg & 7 ) + { + case 2: /* call (near) */ + case 4: /* jmp (near) */ + case 6: /* push */ + if ( mode_64bit() && op_bytes == 4 ) + op_bytes = 8; + /* fall through */ + case 3: /* call (far, absolute indirect) */ + case 5: /* jmp (far, absolute indirect) */ + d = DstNone|SrcMem|ModRM; + break; + } + break; + } + /* Decode and fetch the source operand: register, memory or immediate. */ switch ( d & SrcMask ) { @@ -3318,16 +3353,6 @@ x86_emulate( switch ( modrm_reg & 7 ) { case 0 ... 1: /* test */ - /* Special case in Grp3: test has an immediate source operand. */ - src.type = OP_IMM; - src.bytes = (d & ByteOp) ? 1 : op_bytes; - if ( src.bytes == 8 ) src.bytes = 4; - switch ( src.bytes ) - { - case 1: src.val = insn_fetch_type(int8_t); break; - case 2: src.val = insn_fetch_type(int16_t); break; - case 4: src.val = insn_fetch_type(int32_t); break; - } goto test; case 2: /* not */ dst.val = ~dst.val; @@ -3336,12 +3361,11 @@ x86_emulate( emulate_1op("neg", dst, _regs.eflags); break; case 4: /* mul */ - src = dst; dst.type = OP_REG; dst.reg = (unsigned long *)&_regs.eax; dst.val = *dst.reg; _regs.eflags &= ~(EFLG_OF|EFLG_CF); - switch ( src.bytes ) + switch ( dst.bytes = src.bytes ) { case 1: dst.val = (uint8_t)dst.val; @@ -3377,12 +3401,11 @@ x86_emulate( } break; case 5: /* imul */ - src = dst; dst.type = OP_REG; dst.reg = (unsigned long *)&_regs.eax; dst.val = *dst.reg; _regs.eflags &= ~(EFLG_OF|EFLG_CF); - switch ( src.bytes ) + switch ( dst.bytes = src.bytes ) { case 1: dst.val = ((uint16_t)(int8_t)src.val * @@ -3419,10 +3442,10 @@ x86_emulate( break; case 6: /* div */ { unsigned long u[2], v; - src = dst; + dst.type = OP_REG; dst.reg = (unsigned long *)&_regs.eax; - switch ( src.bytes ) + switch ( dst.bytes = src.bytes ) { case 1: u[0] = (uint16_t)_regs.eax; @@ -3469,10 +3492,10 @@ x86_emulate( } case 7: /* idiv */ { unsigned long u[2], v; - src = dst; + dst.type = OP_REG; dst.reg = (unsigned long *)&_regs.eax; - switch ( src.bytes ) + switch ( dst.bytes = src.bytes ) { case 1: u[0] = (int16_t)_regs.eax; @@ -3564,20 +3587,13 @@ x86_emulate( emulate_1op("dec", dst, _regs.eflags); break; case 2: /* call (near) */ + dst.val = _regs.eip; + _regs.eip = src.val; + src.val = dst.val; + goto push; + break; case 4: /* jmp (near) */ - if ( (dst.bytes == 4) && mode_64bit() ) - { - dst.bytes = op_bytes = 8; - if ( dst.type == OP_REG ) - dst.val = *dst.reg; - else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off, - &dst.val, 8, ctxt, ops)) != 0 ) - goto done; - } - src.val = _regs.eip; - _regs.eip = dst.val; - if ( (modrm_reg & 7) == 2 ) - goto push; /* call */ + _regs.eip = src.val; dst.type = OP_NONE; break; case 3: /* call (far, absolute indirect) */ @@ -3610,21 +3626,7 @@ x86_emulate( break; } case 6: /* push */ - /* 64-bit mode: PUSH defaults to a 64-bit operand. */ - if ( mode_64bit() && (dst.bytes == 4) ) - { - dst.bytes = 8; - if ( dst.type == OP_REG ) - dst.val = *dst.reg; - else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off, - &dst.val, 8, ctxt, ops)) != 0 ) - goto done; - } - if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes), - &dst.val, dst.bytes, ctxt)) != 0 ) - goto done; - dst.type = OP_NONE; - break; + goto push; case 7: generate_exception_if(1, EXC_UD, -1); default: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Sep-19 15:45 UTC
[PATCH 5/6] x86_emulate: fix flag setting for 8-bit signed multiplication
We really need to check for a signed overflow of 8 bits, while the previous check compared the sign-extended 8-bit result with the zero-extended 16-bit one (which was wrong for all negative results). Once at it - also adjust the 16-bit comparison for symmetry - improve the 8-bit multiplication (no need to zero-extend to 32-bits the sign-extended to 16 bits original 8-bit value) - fold both signed multiplication variants Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -2096,40 +2096,13 @@ x86_emulate( goto push; case 0x69: /* imul imm16/32 */ - case 0x6b: /* imul imm8 */ { - unsigned long src1; /* ModR/M source operand */ + case 0x6b: /* imul imm8 */ if ( ea.type == OP_REG ) - src1 = *ea.reg; + dst.val = *ea.reg; else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, - &src1, op_bytes, ctxt, ops)) ) + &dst.val, op_bytes, ctxt, ops)) ) goto done; - _regs.eflags &= ~(EFLG_OF|EFLG_CF); - switch ( dst.bytes ) - { - case 2: - dst.val = ((uint32_t)(int16_t)src.val * - (uint32_t)(int16_t)src1); - if ( (int16_t)dst.val != (uint32_t)dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - break; -#ifdef __x86_64__ - case 4: - dst.val = ((uint64_t)(int32_t)src.val * - (uint64_t)(int32_t)src1); - if ( (int32_t)dst.val != dst.val ) - _regs.eflags |= EFLG_OF|EFLG_CF; - break; -#endif - default: { - unsigned long m[2] = { src.val, src1 }; - if ( imul_dbl(m) ) - _regs.eflags |= EFLG_OF|EFLG_CF; - dst.val = m[0]; - break; - } - } - break; - } + goto imul; case 0x6a: /* push imm8 */ src.val = insn_fetch_type(int8_t); @@ -3404,22 +3377,25 @@ x86_emulate( dst.type = OP_REG; dst.reg = (unsigned long *)&_regs.eax; dst.val = *dst.reg; + dst.bytes = src.bytes; + imul: _regs.eflags &= ~(EFLG_OF|EFLG_CF); - switch ( dst.bytes = src.bytes ) + switch ( dst.bytes ) { case 1: - dst.val = ((uint16_t)(int8_t)src.val * - (uint16_t)(int8_t)dst.val); - if ( (int8_t)dst.val != (uint16_t)dst.val ) + dst.val = (int8_t)src.val * (int8_t)dst.val; + if ( (int8_t)dst.val != (int16_t)dst.val ) _regs.eflags |= EFLG_OF|EFLG_CF; - dst.bytes = 2; + if ( b > 0x6b ) + dst.bytes = 2; break; case 2: dst.val = ((uint32_t)(int16_t)src.val * (uint32_t)(int16_t)dst.val); - if ( (int16_t)dst.val != (uint32_t)dst.val ) + if ( (int16_t)dst.val != (int32_t)dst.val ) _regs.eflags |= EFLG_OF|EFLG_CF; - *(uint16_t *)&_regs.edx = dst.val >> 16; + if ( b > 0x6b ) + *(uint16_t *)&_regs.edx = dst.val >> 16; break; #ifdef __x86_64__ case 4: @@ -3427,14 +3403,16 @@ x86_emulate( (uint64_t)(int32_t)dst.val); if ( (int32_t)dst.val != dst.val ) _regs.eflags |= EFLG_OF|EFLG_CF; - _regs.edx = (uint32_t)(dst.val >> 32); + if ( b > 0x6b ) + _regs.edx = (uint32_t)(dst.val >> 32); break; #endif default: { unsigned long m[2] = { src.val, dst.val }; if ( imul_dbl(m) ) _regs.eflags |= EFLG_OF|EFLG_CF; - _regs.edx = m[1]; + if ( b > 0x6b ) + _regs.edx = m[1]; dst.val = m[0]; break; } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
With HVM''s MMIO operand handling now being capable of splitting large reads, there''s no need to issue at most machine word size reads when we really need wider operands. Not that this is not done everywhere - there are a couple of cases where keeping the reads separate is more natural (and folding them would complicate the code rather than simplifying it). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1116,7 +1116,6 @@ protmode_load_seg( { struct segment_register desctab, ss, segr; struct { uint32_t a, b; } desc; - unsigned long val; uint8_t dpl, rpl, cpl; uint32_t new_desc_b, a_flag = 0x100; int rc, fault_type = EXC_GP; @@ -1143,14 +1142,9 @@ protmode_load_seg( if ( ((sel & 0xfff8) + 7) > desctab.limit ) goto raise_exn; - if ( (rc = read_ulong(x86_seg_none, desctab.base + (sel & 0xfff8), - &val, 4, ctxt, ops)) ) + if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8), + &desc, sizeof(desc), ctxt)) ) return rc; - desc.a = val; - if ( (rc = read_ulong(x86_seg_none, desctab.base + (sel & 0xfff8) + 4, - &val, 4, ctxt, ops)) ) - return rc; - desc.b = val; /* Segment present in memory? */ if ( !(desc.b & (1u<<15)) ) @@ -4454,7 +4448,6 @@ x86_emulate( case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ { unsigned long old[2], exp[2], new[2]; - unsigned int i; generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1); generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); @@ -4463,10 +4456,9 @@ x86_emulate( op_bytes *= 2; /* Get actual old value. */ - for ( i = 0; i < (op_bytes/sizeof(long)); i++ ) - if ( (rc = read_ulong(ea.mem.seg, ea.mem.off + i*sizeof(long), - &old[i], sizeof(long), ctxt, ops)) != 0 ) - goto done; + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes, + ctxt)) != 0 ) + goto done; /* Get expected and proposed values. */ if ( op_bytes == 8 ) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 19/09/2013 16:40, "Jan Beulich" <JBeulich@suse.com> wrote:> 1: x86/HVM: properly handle MMIO reads and writes wider than a machine word > 2: x86: fix dependencies of emulator test > 3: x86_emulate: MOVSXD must read source operand just once > 4: x86_emulate: PUSH <mem> must read source operand just once > 5: x86_emulate: fix flag setting for 8-bit signed multiplication > 6: x86_emulate: fold wide reads > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Nice. Acked-by: Keir Fraser <keir@xen.org>
>>> On 19.09.13 at 18:12, Keir Fraser <keir.xen@gmail.com> wrote: > On 19/09/2013 16:40, "Jan Beulich" <JBeulich@suse.com> wrote: > >> 1: x86/HVM: properly handle MMIO reads and writes wider than a machine word >> 2: x86: fix dependencies of emulator test >> 3: x86_emulate: MOVSXD must read source operand just once >> 4: x86_emulate: PUSH <mem> must read source operand just once >> 5: x86_emulate: fix flag setting for 8-bit signed multiplication >> 6: x86_emulate: fold wide reads >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> > > Nice. > > Acked-by: Keir Fraser <keir@xen.org>I''m going to commit them with two tiny adjustments: In the first patch I simplified the initializer of "chunk" to chunk = min(bytes, 1U << LONG_BYTEORDER) And in the fourth patch I replaced the newly added conditional in the 8-bit case with an ASSERT() (which in turn requires the test utility to be adjusted to have ASSERT() available). I''m sure you''re fine with both. There''s going to be one more, fixing wrap around handling for repeated string instructions; need to still give this some minimal testing before submitting. Jan
On 20/09/2013 09:21, "Jan Beulich" <JBeulich@suse.com> wrote:>>>> On 19.09.13 at 18:12, Keir Fraser <keir.xen@gmail.com> wrote: >> On 19/09/2013 16:40, "Jan Beulich" <JBeulich@suse.com> wrote: >> >>> 1: x86/HVM: properly handle MMIO reads and writes wider than a machine word >>> 2: x86: fix dependencies of emulator test >>> 3: x86_emulate: MOVSXD must read source operand just once >>> 4: x86_emulate: PUSH <mem> must read source operand just once >>> 5: x86_emulate: fix flag setting for 8-bit signed multiplication >>> 6: x86_emulate: fold wide reads >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> Nice. >> >> Acked-by: Keir Fraser <keir@xen.org> > > I''m going to commit them with two tiny adjustments: > > In the first patch I simplified the initializer of "chunk" to > > chunk = min(bytes, 1U << LONG_BYTEORDER) > > And in the fourth patch I replaced the newly added conditional in > the 8-bit case with an ASSERT() (which in turn requires the test > utility to be adjusted to have ASSERT() available). > > I''m sure you''re fine with both.Yes, fine! -- Keir> There''s going to be one more, fixing wrap around handling for > repeated string instructions; need to still give this some minimal > testing before submitting. > > Jan >