- we used the C4/C5 (first prefix) byte instead of the apparent ModR/M one as the second prefix byte - early decoding normalized vex.reg, thus corrupting it for the main consumer (copy_REX_VEX()), resulting in #UD on the two-operand instructions we emulate Also add respective test cases to the testing utility plus - fix get_fpu() (the fall-through order was inverted) - add cpu_has_avx2, even if it''s currently unused (as in the new test cases I decided to refrain from using AVX2 instructions in order to be able to actually run all the tests on the hardware I have) - slightly tweak cpu_has_avx to more consistently express the outputs we don''t care about (sinking them all into the same variable) Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x } #define cpu_has_avx ({ \ - unsigned int eax = 1, ecx = 0, edx; \ - cpuid(&eax, &edx, &ecx, &edx, NULL); \ + unsigned int eax = 1, ecx = 0; \ + cpuid(&eax, &eax, &ecx, &eax, NULL); \ if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ ecx = 0; \ (ecx & (1U << 28)) != 0; \ }) +#define cpu_has_avx2 ({ \ + unsigned int eax = 1, ebx, ecx = 0; \ + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ + if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ + ebx = 0; \ + else { \ + eax = 7, ecx = 0; \ + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ + } \ + (ebx & (1U << 5)) != 0; \ +}) + int get_fpu( void (*exception_callback)(void *, struct cpu_user_regs *), void *exception_callback_arg, @@ -111,14 +123,14 @@ int get_fpu( { case X86EMUL_FPU_fpu: break; - case X86EMUL_FPU_ymm: - if ( cpu_has_avx ) + case X86EMUL_FPU_mmx: + if ( cpu_has_mmx ) break; case X86EMUL_FPU_xmm: if ( cpu_has_sse ) break; - case X86EMUL_FPU_mmx: - if ( cpu_has_mmx ) + case X86EMUL_FPU_ymm: + if ( cpu_has_avx ) break; default: return X86EMUL_UNHANDLEABLE; @@ -629,6 +641,73 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); + if ( stack_exec && cpu_has_avx ) + { + extern const unsigned char vmovdqu_to_mem[]; + + asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" + ".pushsection .test, \"a\", @progbits\n" + "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" + ".popsection" :: "c" (NULL) ); + + memcpy(instr, vmovdqu_to_mem, 15); + memset(res, 0x55, 128); + memset(res + 16, 0xff, 16); + memset(res + 20, 0x00, 16); + regs.eip = (unsigned long)&instr[0]; + regs.ecx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); + if ( stack_exec && cpu_has_avx ) + { + extern const unsigned char vmovdqu_from_mem[]; + +#if 0 /* Don''t use AVX2 instructions for now */ + asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" +#else + asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" + "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" +#endif + ".pushsection .test, \"a\", @progbits\n" + "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" + ".popsection" :: "d" (NULL) ); + + memcpy(instr, vmovdqu_from_mem, 15); + memset(res + 4, 0xff, 16); + regs.eip = (unsigned long)&instr[0]; + regs.ecx = 0; + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY ) + goto fail; +#if 0 /* Don''t use AVX2 instructions for now */ + asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" + "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" + "vpmovmskb %%ymm1, %0" : "=r" (rc) ); +#else + asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" + "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" + "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" + "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" + "vpmovmskb %%xmm0, %0\n\t" + "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); + rc |= i << 16; +#endif + if ( rc != 0xffffffff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing movsd %xmm5,(%ecx)..."); memset(res, 0x77, 64); memset(res + 10, 0x66, 8); @@ -683,6 +762,59 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing vmovsd %xmm5,(%ecx)..."); + memset(res, 0x88, 64); + memset(res + 10, 0x77, 8); + if ( stack_exec && cpu_has_avx ) + { + extern const unsigned char vmovsd_to_mem[]; + + asm volatile ( "vbroadcastsd %0, %%ymm5\n" + ".pushsection .test, \"a\", @progbits\n" + "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" + ".popsection" :: "m" (res[10]), "c" (NULL) ); + + memcpy(instr, vmovsd_to_mem, 15); + regs.eip = (unsigned long)&instr[0]; + regs.ecx = (unsigned long)(res + 2); + regs.edx = 0; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) + goto fail; + printf("okay\n"); + } + else + { + printf("skipped\n"); + memset(res + 2, 0x77, 8); + } + + printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); + if ( stack_exec && cpu_has_avx ) + { + extern const unsigned char vmovaps_from_mem[]; + + asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" + ".pushsection .test, \"a\", @progbits\n" + "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" + ".popsection" :: "d" (NULL) ); + + memcpy(instr, vmovaps_from_mem, 15); + regs.eip = (unsigned long)&instr[0]; + regs.ecx = 0; + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY ) + goto fail; + asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" + "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); + if ( rc != 0xff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + for ( j = 1; j <= 2; j++ ) { #if defined(__i386__) --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1454,10 +1454,10 @@ x86_emulate( /* VEX */ generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); - vex.raw[0] = b; + vex.raw[0] = modrm; if ( b & 1 ) { - vex.raw[1] = b; + vex.raw[1] = modrm; vex.opcx = vex_0f; vex.x = 1; vex.b = 1; @@ -1479,10 +1479,7 @@ x86_emulate( } } } - vex.reg ^= 0xf; - if ( !mode_64bit() ) - vex.reg &= 0x7; - else if ( !vex.r ) + if ( mode_64bit() && !vex.r ) rex_prefix |= REX_R; fail_if(vex.opcx != vex_0f); @@ -3899,8 +3896,9 @@ x86_emulate( else { fail_if((vex.opcx != vex_0f) || - (vex.reg && ((ea.type == OP_MEM) || - !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); + ((vex.reg != 0xf) && + ((ea.type == OP_MEM) || + !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); vcpu_must_have_avx(); get_fpu(X86EMUL_FPU_ymm, &fic); ea.bytes = 16 << vex.l; @@ -4168,7 +4166,7 @@ x86_emulate( } else { - fail_if((vex.opcx != vex_0f) || vex.reg || + fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) || ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); vcpu_must_have_avx(); get_fpu(X86EMUL_FPU_ymm, &fic); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Il 28/08/2013 10:32, Jan Beulich ha scritto:> - we used the C4/C5 (first prefix) byte instead of the apparent ModR/M > one as the second prefix byte > - early decoding normalized vex.reg, thus corrupting it for the main > consumer (copy_REX_VEX()), resulting in #UD on the two-operand > instructions we emulate > > Also add respective test cases to the testing utility plus > - fix get_fpu() (the fall-through order was inverted) > - add cpu_has_avx2, even if it''s currently unused (as in the new test > cases I decided to refrain from using AVX2 instructions in order to > be able to actually run all the tests on the hardware I have) > - slightly tweak cpu_has_avx to more consistently express the outputs > we don''t care about (sinking them all into the same variable)This patch include the solution for full SSE support needed to solve this problem? http://bugs.xenproject.org/xen/bug/11 Thanks for any reply.> > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/tools/tests/x86_emulator/test_x86_emulator.c > +++ b/tools/tests/x86_emulator/test_x86_emulator.c > @@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x > } > > #define cpu_has_avx ({ \ > - unsigned int eax = 1, ecx = 0, edx; \ > - cpuid(&eax, &edx, &ecx, &edx, NULL); \ > + unsigned int eax = 1, ecx = 0; \ > + cpuid(&eax, &eax, &ecx, &eax, NULL); \ > if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ > ecx = 0; \ > (ecx & (1U << 28)) != 0; \ > }) > > +#define cpu_has_avx2 ({ \ > + unsigned int eax = 1, ebx, ecx = 0; \ > + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ > + if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ > + ebx = 0; \ > + else { \ > + eax = 7, ecx = 0; \ > + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ > + } \ > + (ebx & (1U << 5)) != 0; \ > +}) > + > int get_fpu( > void (*exception_callback)(void *, struct cpu_user_regs *), > void *exception_callback_arg, > @@ -111,14 +123,14 @@ int get_fpu( > { > case X86EMUL_FPU_fpu: > break; > - case X86EMUL_FPU_ymm: > - if ( cpu_has_avx ) > + case X86EMUL_FPU_mmx: > + if ( cpu_has_mmx ) > break; > case X86EMUL_FPU_xmm: > if ( cpu_has_sse ) > break; > - case X86EMUL_FPU_mmx: > - if ( cpu_has_mmx ) > + case X86EMUL_FPU_ymm: > + if ( cpu_has_avx ) > break; > default: > return X86EMUL_UNHANDLEABLE; > @@ -629,6 +641,73 @@ int main(int argc, char **argv) > else > printf("skipped\n"); > > + printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); > + if ( stack_exec && cpu_has_avx ) > + { > + extern const unsigned char vmovdqu_to_mem[]; > + > + asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" > + ".pushsection .test, \"a\", @progbits\n" > + "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" > + ".popsection" :: "c" (NULL) ); > + > + memcpy(instr, vmovdqu_to_mem, 15); > + memset(res, 0x55, 128); > + memset(res + 16, 0xff, 16); > + memset(res + 20, 0x00, 16); > + regs.eip = (unsigned long)&instr[0]; > + regs.ecx = (unsigned long)res; > + rc = x86_emulate(&ctxt, &emulops); > + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) > + goto fail; > + printf("okay\n"); > + } > + else > + printf("skipped\n"); > + > + printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); > + if ( stack_exec && cpu_has_avx ) > + { > + extern const unsigned char vmovdqu_from_mem[]; > + > +#if 0 /* Don''t use AVX2 instructions for now */ > + asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" > +#else > + asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" > + "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" > +#endif > + ".pushsection .test, \"a\", @progbits\n" > + "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" > + ".popsection" :: "d" (NULL) ); > + > + memcpy(instr, vmovdqu_from_mem, 15); > + memset(res + 4, 0xff, 16); > + regs.eip = (unsigned long)&instr[0]; > + regs.ecx = 0; > + regs.edx = (unsigned long)res; > + rc = x86_emulate(&ctxt, &emulops); > + if ( rc != X86EMUL_OKAY ) > + goto fail; > +#if 0 /* Don''t use AVX2 instructions for now */ > + asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" > + "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" > + "vpmovmskb %%ymm1, %0" : "=r" (rc) ); > +#else > + asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" > + "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" > + "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" > + "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" > + "vpmovmskb %%xmm0, %0\n\t" > + "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); > + rc |= i << 16; > +#endif > + if ( rc != 0xffffffff ) > + goto fail; > + printf("okay\n"); > + } > + else > + printf("skipped\n"); > + > printf("%-40s", "Testing movsd %xmm5,(%ecx)..."); > memset(res, 0x77, 64); > memset(res + 10, 0x66, 8); > @@ -683,6 +762,59 @@ int main(int argc, char **argv) > else > printf("skipped\n"); > > + printf("%-40s", "Testing vmovsd %xmm5,(%ecx)..."); > + memset(res, 0x88, 64); > + memset(res + 10, 0x77, 8); > + if ( stack_exec && cpu_has_avx ) > + { > + extern const unsigned char vmovsd_to_mem[]; > + > + asm volatile ( "vbroadcastsd %0, %%ymm5\n" > + ".pushsection .test, \"a\", @progbits\n" > + "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" > + ".popsection" :: "m" (res[10]), "c" (NULL) ); > + > + memcpy(instr, vmovsd_to_mem, 15); > + regs.eip = (unsigned long)&instr[0]; > + regs.ecx = (unsigned long)(res + 2); > + regs.edx = 0; > + rc = x86_emulate(&ctxt, &emulops); > + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) > + goto fail; > + printf("okay\n"); > + } > + else > + { > + printf("skipped\n"); > + memset(res + 2, 0x77, 8); > + } > + > + printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); > + if ( stack_exec && cpu_has_avx ) > + { > + extern const unsigned char vmovaps_from_mem[]; > + > + asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" > + ".pushsection .test, \"a\", @progbits\n" > + "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" > + ".popsection" :: "d" (NULL) ); > + > + memcpy(instr, vmovaps_from_mem, 15); > + regs.eip = (unsigned long)&instr[0]; > + regs.ecx = 0; > + regs.edx = (unsigned long)res; > + rc = x86_emulate(&ctxt, &emulops); > + if ( rc != X86EMUL_OKAY ) > + goto fail; > + asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" > + "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); > + if ( rc != 0xff ) > + goto fail; > + printf("okay\n"); > + } > + else > + printf("skipped\n"); > + > for ( j = 1; j <= 2; j++ ) > { > #if defined(__i386__) > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -1454,10 +1454,10 @@ x86_emulate( > /* VEX */ > generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); > > - vex.raw[0] = b; > + vex.raw[0] = modrm; > if ( b & 1 ) > { > - vex.raw[1] = b; > + vex.raw[1] = modrm; > vex.opcx = vex_0f; > vex.x = 1; > vex.b = 1; > @@ -1479,10 +1479,7 @@ x86_emulate( > } > } > } > - vex.reg ^= 0xf; > - if ( !mode_64bit() ) > - vex.reg &= 0x7; > - else if ( !vex.r ) > + if ( mode_64bit() && !vex.r ) > rex_prefix |= REX_R; > > fail_if(vex.opcx != vex_0f); > @@ -3899,8 +3896,9 @@ x86_emulate( > else > { > fail_if((vex.opcx != vex_0f) || > - (vex.reg && ((ea.type == OP_MEM) || > - !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); > + ((vex.reg != 0xf) && > + ((ea.type == OP_MEM) || > + !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); > vcpu_must_have_avx(); > get_fpu(X86EMUL_FPU_ymm, &fic); > ea.bytes = 16 << vex.l; > @@ -4168,7 +4166,7 @@ x86_emulate( > } > else > { > - fail_if((vex.opcx != vex_0f) || vex.reg || > + fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) || > ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); > vcpu_must_have_avx(); > get_fpu(X86EMUL_FPU_ymm, &fic); > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 28/08/13 09:49, Fabio Fantoni wrote:> Il 28/08/2013 10:32, Jan Beulich ha scritto: >> - we used the C4/C5 (first prefix) byte instead of the apparent ModR/M >> one as the second prefix byte >> - early decoding normalized vex.reg, thus corrupting it for the main >> consumer (copy_REX_VEX()), resulting in #UD on the two-operand >> instructions we emulate >> >> Also add respective test cases to the testing utility plus >> - fix get_fpu() (the fall-through order was inverted) >> - add cpu_has_avx2, even if it''s currently unused (as in the new test >> cases I decided to refrain from using AVX2 instructions in order to >> be able to actually run all the tests on the hardware I have) >> - slightly tweak cpu_has_avx to more consistently express the outputs >> we don''t care about (sinking them all into the same variable) > > This patch include the solution for full SSE support needed to solve > this problem? > http://bugs.xenproject.org/xen/bug/11 > > Thanks for any reply.No it doesn''t. Bug #11 is a problem with the communication between Xen and Qemu. This fixes some bugs with Xens emulation of instructions, irrespective of Qemu. ~Andrew> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> --- a/tools/tests/x86_emulator/test_x86_emulator.c >> +++ b/tools/tests/x86_emulator/test_x86_emulator.c >> @@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x >> } >> >> #define cpu_has_avx ({ \ >> - unsigned int eax = 1, ecx = 0, edx; \ >> - cpuid(&eax, &edx, &ecx, &edx, NULL); \ >> + unsigned int eax = 1, ecx = 0; \ >> + cpuid(&eax, &eax, &ecx, &eax, NULL); \ >> if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ >> ecx = 0; \ >> (ecx & (1U << 28)) != 0; \ >> }) >> >> +#define cpu_has_avx2 ({ \ >> + unsigned int eax = 1, ebx, ecx = 0; \ >> + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ >> + if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ >> + ebx = 0; \ >> + else { \ >> + eax = 7, ecx = 0; \ >> + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ >> + } \ >> + (ebx & (1U << 5)) != 0; \ >> +}) >> + >> int get_fpu( >> void (*exception_callback)(void *, struct cpu_user_regs *), >> void *exception_callback_arg, >> @@ -111,14 +123,14 @@ int get_fpu( >> { >> case X86EMUL_FPU_fpu: >> break; >> - case X86EMUL_FPU_ymm: >> - if ( cpu_has_avx ) >> + case X86EMUL_FPU_mmx: >> + if ( cpu_has_mmx ) >> break; >> case X86EMUL_FPU_xmm: >> if ( cpu_has_sse ) >> break; >> - case X86EMUL_FPU_mmx: >> - if ( cpu_has_mmx ) >> + case X86EMUL_FPU_ymm: >> + if ( cpu_has_avx ) >> break; >> default: >> return X86EMUL_UNHANDLEABLE; >> @@ -629,6 +641,73 @@ int main(int argc, char **argv) >> else >> printf("skipped\n"); >> >> + printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovdqu_to_mem[]; >> + >> + asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" >> + ".popsection" :: "c" (NULL) ); >> + >> + memcpy(instr, vmovdqu_to_mem, 15); >> + memset(res, 0x55, 128); >> + memset(res + 16, 0xff, 16); >> + memset(res + 20, 0x00, 16); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = (unsigned long)res; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> + printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovdqu_from_mem[]; >> + >> +#if 0 /* Don''t use AVX2 instructions for now */ >> + asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" >> +#else >> + asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" >> + "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" >> +#endif >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" >> + ".popsection" :: "d" (NULL) ); >> + >> + memcpy(instr, vmovdqu_from_mem, 15); >> + memset(res + 4, 0xff, 16); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = 0; >> + regs.edx = (unsigned long)res; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( rc != X86EMUL_OKAY ) >> + goto fail; >> +#if 0 /* Don''t use AVX2 instructions for now */ >> + asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" >> + "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" >> + "vpmovmskb %%ymm1, %0" : "=r" (rc) ); >> +#else >> + asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" >> + "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" >> + "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" >> + "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" >> + "vpmovmskb %%xmm0, %0\n\t" >> + "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); >> + rc |= i << 16; >> +#endif >> + if ( rc != 0xffffffff ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> printf("%-40s", "Testing movsd %xmm5,(%ecx)..."); >> memset(res, 0x77, 64); >> memset(res + 10, 0x66, 8); >> @@ -683,6 +762,59 @@ int main(int argc, char **argv) >> else >> printf("skipped\n"); >> >> + printf("%-40s", "Testing vmovsd %xmm5,(%ecx)..."); >> + memset(res, 0x88, 64); >> + memset(res + 10, 0x77, 8); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovsd_to_mem[]; >> + >> + asm volatile ( "vbroadcastsd %0, %%ymm5\n" >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" >> + ".popsection" :: "m" (res[10]), "c" (NULL) ); >> + >> + memcpy(instr, vmovsd_to_mem, 15); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = (unsigned long)(res + 2); >> + regs.edx = 0; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + { >> + printf("skipped\n"); >> + memset(res + 2, 0x77, 8); >> + } >> + >> + printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovaps_from_mem[]; >> + >> + asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" >> + ".popsection" :: "d" (NULL) ); >> + >> + memcpy(instr, vmovaps_from_mem, 15); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = 0; >> + regs.edx = (unsigned long)res; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( rc != X86EMUL_OKAY ) >> + goto fail; >> + asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" >> + "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); >> + if ( rc != 0xff ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> for ( j = 1; j <= 2; j++ ) >> { >> #if defined(__i386__) >> --- a/xen/arch/x86/x86_emulate/x86_emulate.c >> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c >> @@ -1454,10 +1454,10 @@ x86_emulate( >> /* VEX */ >> generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); >> >> - vex.raw[0] = b; >> + vex.raw[0] = modrm; >> if ( b & 1 ) >> { >> - vex.raw[1] = b; >> + vex.raw[1] = modrm; >> vex.opcx = vex_0f; >> vex.x = 1; >> vex.b = 1; >> @@ -1479,10 +1479,7 @@ x86_emulate( >> } >> } >> } >> - vex.reg ^= 0xf; >> - if ( !mode_64bit() ) >> - vex.reg &= 0x7; >> - else if ( !vex.r ) >> + if ( mode_64bit() && !vex.r ) >> rex_prefix |= REX_R; >> >> fail_if(vex.opcx != vex_0f); >> @@ -3899,8 +3896,9 @@ x86_emulate( >> else >> { >> fail_if((vex.opcx != vex_0f) || >> - (vex.reg && ((ea.type == OP_MEM) || >> - !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); >> + ((vex.reg != 0xf) && >> + ((ea.type == OP_MEM) || >> + !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); >> vcpu_must_have_avx(); >> get_fpu(X86EMUL_FPU_ymm, &fic); >> ea.bytes = 16 << vex.l; >> @@ -4168,7 +4166,7 @@ x86_emulate( >> } >> else >> { >> - fail_if((vex.opcx != vex_0f) || vex.reg || >> + fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) || >> ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); >> vcpu_must_have_avx(); >> get_fpu(X86EMUL_FPU_ymm, &fic); >> >> >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xen.org >> http://lists.xen.org/xen-devel > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 28/08/2013 09:32, "Jan Beulich" <JBeulich@suse.com> wrote:> - we used the C4/C5 (first prefix) byte instead of the apparent ModR/M > one as the second prefix byte > - early decoding normalized vex.reg, thus corrupting it for the main > consumer (copy_REX_VEX()), resulting in #UD on the two-operand > instructions we emulate > > Also add respective test cases to the testing utility plus > - fix get_fpu() (the fall-through order was inverted) > - add cpu_has_avx2, even if it''s currently unused (as in the new test > cases I decided to refrain from using AVX2 instructions in order to > be able to actually run all the tests on the hardware I have) > - slightly tweak cpu_has_avx to more consistently express the outputs > we don''t care about (sinking them all into the same variable) > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>