- we used the C4/C5 (first prefix) byte instead of the apparent ModR/M
one as the second prefix byte
- early decoding normalized vex.reg, thus corrupting it for the main
consumer (copy_REX_VEX()), resulting in #UD on the two-operand
instructions we emulate
Also add respective test cases to the testing utility plus
- fix get_fpu() (the fall-through order was inverted)
- add cpu_has_avx2, even if it''s currently unused (as in the new test
cases I decided to refrain from using AVX2 instructions in order to
be able to actually run all the tests on the hardware I have)
- slightly tweak cpu_has_avx to more consistently express the outputs
we don''t care about (sinking them all into the same variable)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x
}
#define cpu_has_avx ({ \
- unsigned int eax = 1, ecx = 0, edx; \
- cpuid(&eax, &edx, &ecx, &edx, NULL); \
+ unsigned int eax = 1, ecx = 0; \
+ cpuid(&eax, &eax, &ecx, &eax, NULL); \
if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
ecx = 0; \
(ecx & (1U << 28)) != 0; \
})
+#define cpu_has_avx2 ({ \
+ unsigned int eax = 1, ebx, ecx = 0; \
+ cpuid(&eax, &ebx, &ecx, &eax, NULL); \
+ if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
+ ebx = 0; \
+ else { \
+ eax = 7, ecx = 0; \
+ cpuid(&eax, &ebx, &ecx, &eax, NULL); \
+ } \
+ (ebx & (1U << 5)) != 0; \
+})
+
int get_fpu(
void (*exception_callback)(void *, struct cpu_user_regs *),
void *exception_callback_arg,
@@ -111,14 +123,14 @@ int get_fpu(
{
case X86EMUL_FPU_fpu:
break;
- case X86EMUL_FPU_ymm:
- if ( cpu_has_avx )
+ case X86EMUL_FPU_mmx:
+ if ( cpu_has_mmx )
break;
case X86EMUL_FPU_xmm:
if ( cpu_has_sse )
break;
- case X86EMUL_FPU_mmx:
- if ( cpu_has_mmx )
+ case X86EMUL_FPU_ymm:
+ if ( cpu_has_avx )
break;
default:
return X86EMUL_UNHANDLEABLE;
@@ -629,6 +641,73 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+ printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)...");
+ if ( stack_exec && cpu_has_avx )
+ {
+ extern const unsigned char vmovdqu_to_mem[];
+
+ asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n"
+ ".pushsection .test, \"a\",
@progbits\n"
+ "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n"
+ ".popsection" :: "c" (NULL) );
+
+ memcpy(instr, vmovdqu_to_mem, 15);
+ memset(res, 0x55, 128);
+ memset(res + 16, 0xff, 16);
+ memset(res + 20, 0x00, 16);
+ regs.eip = (unsigned long)&instr[0];
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing vmovdqu (%edx),%ymm4...");
+ if ( stack_exec && cpu_has_avx )
+ {
+ extern const unsigned char vmovdqu_from_mem[];
+
+#if 0 /* Don''t use AVX2 instructions for now */
+ asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
+#else
+ asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
+ "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
+#endif
+ ".pushsection .test, \"a\",
@progbits\n"
+ "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n"
+ ".popsection" :: "d" (NULL) );
+
+ memcpy(instr, vmovdqu_from_mem, 15);
+ memset(res + 4, 0xff, 16);
+ regs.eip = (unsigned long)&instr[0];
+ regs.ecx = 0;
+ regs.edx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY )
+ goto fail;
+#if 0 /* Don''t use AVX2 instructions for now */
+ asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+ "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+ "vpmovmskb %%ymm1, %0" : "=r" (rc) );
+#else
+ asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+ "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+ "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+ "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+ "vpmovmskb %%xmm0, %0\n\t"
+ "vpmovmskb %%xmm1, %1" : "=r" (rc),
"=r" (i) );
+ rc |= i << 16;
+#endif
+ if ( rc != 0xffffffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing movsd %xmm5,(%ecx)...");
memset(res, 0x77, 64);
memset(res + 10, 0x66, 8);
@@ -683,6 +762,59 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+ printf("%-40s", "Testing vmovsd %xmm5,(%ecx)...");
+ memset(res, 0x88, 64);
+ memset(res + 10, 0x77, 8);
+ if ( stack_exec && cpu_has_avx )
+ {
+ extern const unsigned char vmovsd_to_mem[];
+
+ asm volatile ( "vbroadcastsd %0, %%ymm5\n"
+ ".pushsection .test, \"a\",
@progbits\n"
+ "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n"
+ ".popsection" :: "m" (res[10]),
"c" (NULL) );
+
+ memcpy(instr, vmovsd_to_mem, 15);
+ regs.eip = (unsigned long)&instr[0];
+ regs.ecx = (unsigned long)(res + 2);
+ regs.edx = 0;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ {
+ printf("skipped\n");
+ memset(res + 2, 0x77, 8);
+ }
+
+ printf("%-40s", "Testing vmovaps (%edx),%ymm7...");
+ if ( stack_exec && cpu_has_avx )
+ {
+ extern const unsigned char vmovaps_from_mem[];
+
+ asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n"
+ ".pushsection .test, \"a\",
@progbits\n"
+ "vmovaps_from_mem: vmovaps (%0), %%ymm7\n"
+ ".popsection" :: "d" (NULL) );
+
+ memcpy(instr, vmovaps_from_mem, 15);
+ regs.eip = (unsigned long)&instr[0];
+ regs.ecx = 0;
+ regs.edx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY )
+ goto fail;
+ asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t"
+ "vmovmskps %%ymm0, %0" : "=r" (rc) :
"m" (res[8]) );
+ if ( rc != 0xff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
for ( j = 1; j <= 2; j++ )
{
#if defined(__i386__)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1454,10 +1454,10 @@ x86_emulate(
/* VEX */
generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1);
- vex.raw[0] = b;
+ vex.raw[0] = modrm;
if ( b & 1 )
{
- vex.raw[1] = b;
+ vex.raw[1] = modrm;
vex.opcx = vex_0f;
vex.x = 1;
vex.b = 1;
@@ -1479,10 +1479,7 @@ x86_emulate(
}
}
}
- vex.reg ^= 0xf;
- if ( !mode_64bit() )
- vex.reg &= 0x7;
- else if ( !vex.r )
+ if ( mode_64bit() && !vex.r )
rex_prefix |= REX_R;
fail_if(vex.opcx != vex_0f);
@@ -3899,8 +3896,9 @@ x86_emulate(
else
{
fail_if((vex.opcx != vex_0f) ||
- (vex.reg && ((ea.type == OP_MEM) ||
- !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
+ ((vex.reg != 0xf) &&
+ ((ea.type == OP_MEM) ||
+ !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
vcpu_must_have_avx();
get_fpu(X86EMUL_FPU_ymm, &fic);
ea.bytes = 16 << vex.l;
@@ -4168,7 +4166,7 @@ x86_emulate(
}
else
{
- fail_if((vex.opcx != vex_0f) || vex.reg ||
+ fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) ||
((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
vcpu_must_have_avx();
get_fpu(X86EMUL_FPU_ymm, &fic);
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Il 28/08/2013 10:32, Jan Beulich ha scritto:> - we used the C4/C5 (first prefix) byte instead of the apparent ModR/M > one as the second prefix byte > - early decoding normalized vex.reg, thus corrupting it for the main > consumer (copy_REX_VEX()), resulting in #UD on the two-operand > instructions we emulate > > Also add respective test cases to the testing utility plus > - fix get_fpu() (the fall-through order was inverted) > - add cpu_has_avx2, even if it''s currently unused (as in the new test > cases I decided to refrain from using AVX2 instructions in order to > be able to actually run all the tests on the hardware I have) > - slightly tweak cpu_has_avx to more consistently express the outputs > we don''t care about (sinking them all into the same variable)This patch include the solution for full SSE support needed to solve this problem? http://bugs.xenproject.org/xen/bug/11 Thanks for any reply.> > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/tools/tests/x86_emulator/test_x86_emulator.c > +++ b/tools/tests/x86_emulator/test_x86_emulator.c > @@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x > } > > #define cpu_has_avx ({ \ > - unsigned int eax = 1, ecx = 0, edx; \ > - cpuid(&eax, &edx, &ecx, &edx, NULL); \ > + unsigned int eax = 1, ecx = 0; \ > + cpuid(&eax, &eax, &ecx, &eax, NULL); \ > if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ > ecx = 0; \ > (ecx & (1U << 28)) != 0; \ > }) > > +#define cpu_has_avx2 ({ \ > + unsigned int eax = 1, ebx, ecx = 0; \ > + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ > + if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ > + ebx = 0; \ > + else { \ > + eax = 7, ecx = 0; \ > + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ > + } \ > + (ebx & (1U << 5)) != 0; \ > +}) > + > int get_fpu( > void (*exception_callback)(void *, struct cpu_user_regs *), > void *exception_callback_arg, > @@ -111,14 +123,14 @@ int get_fpu( > { > case X86EMUL_FPU_fpu: > break; > - case X86EMUL_FPU_ymm: > - if ( cpu_has_avx ) > + case X86EMUL_FPU_mmx: > + if ( cpu_has_mmx ) > break; > case X86EMUL_FPU_xmm: > if ( cpu_has_sse ) > break; > - case X86EMUL_FPU_mmx: > - if ( cpu_has_mmx ) > + case X86EMUL_FPU_ymm: > + if ( cpu_has_avx ) > break; > default: > return X86EMUL_UNHANDLEABLE; > @@ -629,6 +641,73 @@ int main(int argc, char **argv) > else > printf("skipped\n"); > > + printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); > + if ( stack_exec && cpu_has_avx ) > + { > + extern const unsigned char vmovdqu_to_mem[]; > + > + asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" > + ".pushsection .test, \"a\", @progbits\n" > + "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" > + ".popsection" :: "c" (NULL) ); > + > + memcpy(instr, vmovdqu_to_mem, 15); > + memset(res, 0x55, 128); > + memset(res + 16, 0xff, 16); > + memset(res + 20, 0x00, 16); > + regs.eip = (unsigned long)&instr[0]; > + regs.ecx = (unsigned long)res; > + rc = x86_emulate(&ctxt, &emulops); > + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) > + goto fail; > + printf("okay\n"); > + } > + else > + printf("skipped\n"); > + > + printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); > + if ( stack_exec && cpu_has_avx ) > + { > + extern const unsigned char vmovdqu_from_mem[]; > + > +#if 0 /* Don''t use AVX2 instructions for now */ > + asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" > +#else > + asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" > + "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" > +#endif > + ".pushsection .test, \"a\", @progbits\n" > + "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" > + ".popsection" :: "d" (NULL) ); > + > + memcpy(instr, vmovdqu_from_mem, 15); > + memset(res + 4, 0xff, 16); > + regs.eip = (unsigned long)&instr[0]; > + regs.ecx = 0; > + regs.edx = (unsigned long)res; > + rc = x86_emulate(&ctxt, &emulops); > + if ( rc != X86EMUL_OKAY ) > + goto fail; > +#if 0 /* Don''t use AVX2 instructions for now */ > + asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" > + "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" > + "vpmovmskb %%ymm1, %0" : "=r" (rc) ); > +#else > + asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" > + "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" > + "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" > + "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" > + "vpmovmskb %%xmm0, %0\n\t" > + "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); > + rc |= i << 16; > +#endif > + if ( rc != 0xffffffff ) > + goto fail; > + printf("okay\n"); > + } > + else > + printf("skipped\n"); > + > printf("%-40s", "Testing movsd %xmm5,(%ecx)..."); > memset(res, 0x77, 64); > memset(res + 10, 0x66, 8); > @@ -683,6 +762,59 @@ int main(int argc, char **argv) > else > printf("skipped\n"); > > + printf("%-40s", "Testing vmovsd %xmm5,(%ecx)..."); > + memset(res, 0x88, 64); > + memset(res + 10, 0x77, 8); > + if ( stack_exec && cpu_has_avx ) > + { > + extern const unsigned char vmovsd_to_mem[]; > + > + asm volatile ( "vbroadcastsd %0, %%ymm5\n" > + ".pushsection .test, \"a\", @progbits\n" > + "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" > + ".popsection" :: "m" (res[10]), "c" (NULL) ); > + > + memcpy(instr, vmovsd_to_mem, 15); > + regs.eip = (unsigned long)&instr[0]; > + regs.ecx = (unsigned long)(res + 2); > + regs.edx = 0; > + rc = x86_emulate(&ctxt, &emulops); > + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) > + goto fail; > + printf("okay\n"); > + } > + else > + { > + printf("skipped\n"); > + memset(res + 2, 0x77, 8); > + } > + > + printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); > + if ( stack_exec && cpu_has_avx ) > + { > + extern const unsigned char vmovaps_from_mem[]; > + > + asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" > + ".pushsection .test, \"a\", @progbits\n" > + "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" > + ".popsection" :: "d" (NULL) ); > + > + memcpy(instr, vmovaps_from_mem, 15); > + regs.eip = (unsigned long)&instr[0]; > + regs.ecx = 0; > + regs.edx = (unsigned long)res; > + rc = x86_emulate(&ctxt, &emulops); > + if ( rc != X86EMUL_OKAY ) > + goto fail; > + asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" > + "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); > + if ( rc != 0xff ) > + goto fail; > + printf("okay\n"); > + } > + else > + printf("skipped\n"); > + > for ( j = 1; j <= 2; j++ ) > { > #if defined(__i386__) > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -1454,10 +1454,10 @@ x86_emulate( > /* VEX */ > generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); > > - vex.raw[0] = b; > + vex.raw[0] = modrm; > if ( b & 1 ) > { > - vex.raw[1] = b; > + vex.raw[1] = modrm; > vex.opcx = vex_0f; > vex.x = 1; > vex.b = 1; > @@ -1479,10 +1479,7 @@ x86_emulate( > } > } > } > - vex.reg ^= 0xf; > - if ( !mode_64bit() ) > - vex.reg &= 0x7; > - else if ( !vex.r ) > + if ( mode_64bit() && !vex.r ) > rex_prefix |= REX_R; > > fail_if(vex.opcx != vex_0f); > @@ -3899,8 +3896,9 @@ x86_emulate( > else > { > fail_if((vex.opcx != vex_0f) || > - (vex.reg && ((ea.type == OP_MEM) || > - !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); > + ((vex.reg != 0xf) && > + ((ea.type == OP_MEM) || > + !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); > vcpu_must_have_avx(); > get_fpu(X86EMUL_FPU_ymm, &fic); > ea.bytes = 16 << vex.l; > @@ -4168,7 +4166,7 @@ x86_emulate( > } > else > { > - fail_if((vex.opcx != vex_0f) || vex.reg || > + fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) || > ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); > vcpu_must_have_avx(); > get_fpu(X86EMUL_FPU_ymm, &fic); > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 28/08/13 09:49, Fabio Fantoni wrote:> Il 28/08/2013 10:32, Jan Beulich ha scritto: >> - we used the C4/C5 (first prefix) byte instead of the apparent ModR/M >> one as the second prefix byte >> - early decoding normalized vex.reg, thus corrupting it for the main >> consumer (copy_REX_VEX()), resulting in #UD on the two-operand >> instructions we emulate >> >> Also add respective test cases to the testing utility plus >> - fix get_fpu() (the fall-through order was inverted) >> - add cpu_has_avx2, even if it''s currently unused (as in the new test >> cases I decided to refrain from using AVX2 instructions in order to >> be able to actually run all the tests on the hardware I have) >> - slightly tweak cpu_has_avx to more consistently express the outputs >> we don''t care about (sinking them all into the same variable) > > This patch include the solution for full SSE support needed to solve > this problem? > http://bugs.xenproject.org/xen/bug/11 > > Thanks for any reply.No it doesn''t. Bug #11 is a problem with the communication between Xen and Qemu. This fixes some bugs with Xens emulation of instructions, irrespective of Qemu. ~Andrew> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> --- a/tools/tests/x86_emulator/test_x86_emulator.c >> +++ b/tools/tests/x86_emulator/test_x86_emulator.c >> @@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x >> } >> >> #define cpu_has_avx ({ \ >> - unsigned int eax = 1, ecx = 0, edx; \ >> - cpuid(&eax, &edx, &ecx, &edx, NULL); \ >> + unsigned int eax = 1, ecx = 0; \ >> + cpuid(&eax, &eax, &ecx, &eax, NULL); \ >> if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ >> ecx = 0; \ >> (ecx & (1U << 28)) != 0; \ >> }) >> >> +#define cpu_has_avx2 ({ \ >> + unsigned int eax = 1, ebx, ecx = 0; \ >> + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ >> + if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ >> + ebx = 0; \ >> + else { \ >> + eax = 7, ecx = 0; \ >> + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ >> + } \ >> + (ebx & (1U << 5)) != 0; \ >> +}) >> + >> int get_fpu( >> void (*exception_callback)(void *, struct cpu_user_regs *), >> void *exception_callback_arg, >> @@ -111,14 +123,14 @@ int get_fpu( >> { >> case X86EMUL_FPU_fpu: >> break; >> - case X86EMUL_FPU_ymm: >> - if ( cpu_has_avx ) >> + case X86EMUL_FPU_mmx: >> + if ( cpu_has_mmx ) >> break; >> case X86EMUL_FPU_xmm: >> if ( cpu_has_sse ) >> break; >> - case X86EMUL_FPU_mmx: >> - if ( cpu_has_mmx ) >> + case X86EMUL_FPU_ymm: >> + if ( cpu_has_avx ) >> break; >> default: >> return X86EMUL_UNHANDLEABLE; >> @@ -629,6 +641,73 @@ int main(int argc, char **argv) >> else >> printf("skipped\n"); >> >> + printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovdqu_to_mem[]; >> + >> + asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" >> + ".popsection" :: "c" (NULL) ); >> + >> + memcpy(instr, vmovdqu_to_mem, 15); >> + memset(res, 0x55, 128); >> + memset(res + 16, 0xff, 16); >> + memset(res + 20, 0x00, 16); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = (unsigned long)res; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> + printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovdqu_from_mem[]; >> + >> +#if 0 /* Don''t use AVX2 instructions for now */ >> + asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" >> +#else >> + asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" >> + "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" >> +#endif >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" >> + ".popsection" :: "d" (NULL) ); >> + >> + memcpy(instr, vmovdqu_from_mem, 15); >> + memset(res + 4, 0xff, 16); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = 0; >> + regs.edx = (unsigned long)res; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( rc != X86EMUL_OKAY ) >> + goto fail; >> +#if 0 /* Don''t use AVX2 instructions for now */ >> + asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" >> + "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" >> + "vpmovmskb %%ymm1, %0" : "=r" (rc) ); >> +#else >> + asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" >> + "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" >> + "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" >> + "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" >> + "vpmovmskb %%xmm0, %0\n\t" >> + "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); >> + rc |= i << 16; >> +#endif >> + if ( rc != 0xffffffff ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> printf("%-40s", "Testing movsd %xmm5,(%ecx)..."); >> memset(res, 0x77, 64); >> memset(res + 10, 0x66, 8); >> @@ -683,6 +762,59 @@ int main(int argc, char **argv) >> else >> printf("skipped\n"); >> >> + printf("%-40s", "Testing vmovsd %xmm5,(%ecx)..."); >> + memset(res, 0x88, 64); >> + memset(res + 10, 0x77, 8); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovsd_to_mem[]; >> + >> + asm volatile ( "vbroadcastsd %0, %%ymm5\n" >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" >> + ".popsection" :: "m" (res[10]), "c" (NULL) ); >> + >> + memcpy(instr, vmovsd_to_mem, 15); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = (unsigned long)(res + 2); >> + regs.edx = 0; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + { >> + printf("skipped\n"); >> + memset(res + 2, 0x77, 8); >> + } >> + >> + printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovaps_from_mem[]; >> + >> + asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" >> + ".popsection" :: "d" (NULL) ); >> + >> + memcpy(instr, vmovaps_from_mem, 15); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = 0; >> + regs.edx = (unsigned long)res; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( rc != X86EMUL_OKAY ) >> + goto fail; >> + asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" >> + "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); >> + if ( rc != 0xff ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> for ( j = 1; j <= 2; j++ ) >> { >> #if defined(__i386__) >> --- a/xen/arch/x86/x86_emulate/x86_emulate.c >> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c >> @@ -1454,10 +1454,10 @@ x86_emulate( >> /* VEX */ >> generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); >> >> - vex.raw[0] = b; >> + vex.raw[0] = modrm; >> if ( b & 1 ) >> { >> - vex.raw[1] = b; >> + vex.raw[1] = modrm; >> vex.opcx = vex_0f; >> vex.x = 1; >> vex.b = 1; >> @@ -1479,10 +1479,7 @@ x86_emulate( >> } >> } >> } >> - vex.reg ^= 0xf; >> - if ( !mode_64bit() ) >> - vex.reg &= 0x7; >> - else if ( !vex.r ) >> + if ( mode_64bit() && !vex.r ) >> rex_prefix |= REX_R; >> >> fail_if(vex.opcx != vex_0f); >> @@ -3899,8 +3896,9 @@ x86_emulate( >> else >> { >> fail_if((vex.opcx != vex_0f) || >> - (vex.reg && ((ea.type == OP_MEM) || >> - !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); >> + ((vex.reg != 0xf) && >> + ((ea.type == OP_MEM) || >> + !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); >> vcpu_must_have_avx(); >> get_fpu(X86EMUL_FPU_ymm, &fic); >> ea.bytes = 16 << vex.l; >> @@ -4168,7 +4166,7 @@ x86_emulate( >> } >> else >> { >> - fail_if((vex.opcx != vex_0f) || vex.reg || >> + fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) || >> ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); >> vcpu_must_have_avx(); >> get_fpu(X86EMUL_FPU_ymm, &fic); >> >> >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xen.org >> http://lists.xen.org/xen-devel > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 28/08/2013 09:32, "Jan Beulich" <JBeulich@suse.com> wrote:> - we used the C4/C5 (first prefix) byte instead of the apparent ModR/M > one as the second prefix byte > - early decoding normalized vex.reg, thus corrupting it for the main > consumer (copy_REX_VEX()), resulting in #UD on the two-operand > instructions we emulate > > Also add respective test cases to the testing utility plus > - fix get_fpu() (the fall-through order was inverted) > - add cpu_has_avx2, even if it''s currently unused (as in the new test > cases I decided to refrain from using AVX2 instructions in order to > be able to actually run all the tests on the hardware I have) > - slightly tweak cpu_has_avx to more consistently express the outputs > we don''t care about (sinking them all into the same variable) > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>