On Fri, 2012-10-19 at 16:05 +0100, Jan Beulich wrote:> >>> On 19.10.12 at 17:01, Ian Jackson
<Ian.Jackson@eu.citrix.com> wrote:
> > Jan Beulich writes ("Re: [Xen-devel] [PATCH] tools/xen-mceinj:
support AMD"):
> >> >>> On 19.10.12 at 15:10, Christoph Egger
<Christoph.Egger@amd.com> wrote:
> >> > Ping?
> >>
> >> I''m afraid it''s not really clear who should
commit this - it''s tools
> >> side code, so IanJ or IanC would normally be the ones, but otoh
> >> it''s code requiring low level hardware knowledge to
review the
> >> patch, so both of them might want to rather not do the review.
> >> In the past it was usually Keir who eventually committed such
> >> patches, but I don''t know whether he put this on his
to-look-at-
> >> and-eventually-commit list.
> >
> > My view is that I would like an ack from someone who understands
> > what''s going on ...
>
> Which would ideally be those who introduced the code, i.e.
> Intel folks if I''m not mistaken...
Lets CC some of them then.
Intel folks -- any opinion on the patch below from Christoph?
8<----------------
# User Christoph Egger
# Date 1349437062 -7200
xen mceinj: support AMD.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
diff -r 21704bc429b4 -r 1a3eea784e09 tools/tests/mce-test/tools/xen-mceinj.c
--- a/tools/tests/mce-test/tools/xen-mceinj.c
+++ b/tools/tests/mce-test/tools/xen-mceinj.c
@@ -1,6 +1,7 @@
/*
* xen-mceinj.c: utilities to inject fake MCE for x86.
* Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2012, AMD Cooperation Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -18,6 +19,7 @@
* Authors: Yunhong Jiang <yunhong.jiang@intel.com>
* Haicheng Li <haicheng.li@intel.com>
* Xudong Hao <xudong.hao@intel.com>
+ * Christoph Egger <Christoph.Egger@amd.com>
*/
@@ -44,11 +46,14 @@
#define MCi_type_STATUS 0x1
#define MCi_type_ADDR 0x2
#define MCi_type_MISC 0x3
-#define MCi_type_CTL2 0x4
+#define MC4_type_MISC1 0x4
+#define MC4_type_MISC2 0x5
+#define MC4_type_MISC3 0x6
+#define MCi_type_CTL2 0x7
#define INVALID_MSR ~0UL
-/* Intel MSRs */
+/* X86 machine check MSRs */
#define MSR_IA32_MCG_CAP 0x00000179
#define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b
@@ -56,35 +61,66 @@
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403
+
+/* Intel MSRs */
#define MSR_IA32_MC0_CTL2 0x00000280
-/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
+/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
#define MCG_STATUS_SRAO_LLC_VAL 0x5
#define MCE_SRAO_LLC_BANK 0x7
#define MCi_STATUS_SRAO_LLC_VAL 0xBD2000008000017AUL
#define MCi_MISC_SRAO_LLC_VAL 0x86UL
-/* Memory Patrol Scrub SRAO MCE */
+/* Intel: Memory Patrol Scrub SRAO MCE */
#define MCG_STATUS_SRAO_MEM_VAL 0x5
#define MCE_SRAO_MEM_BANK 0x8
#define MCi_STATUS_SRAO_MEM_VAL 0xBD000000004000CFUL
#define MCi_MISC_SRAO_MEM_VAL 0x86UL
-/* LLC EWB UCNA Error */
+/* Intel: LLC EWB UCNA Error */
#define MCG_STATUS_UCNA_LLC_VAL 0x0
#define CMCI_UCNA_LLC_BANK 0x9
#define MCi_STATUS_UCNA_LLC_VAL 0xBC20000080000136UL
#define MCi_MISC_UCNA_LLC_VAL 0x86UL
-/* Error Types */
-#define MCE_SRAO_MEM 0x0
-#define MCE_SRAO_LLC 0x1
-#define CMCI_UCNA_LLC 0x2
+/* Intel: Error Types */
+#define INTEL_MCE_SRAO_MEM 0x0
+#define INTEL_MCE_SRAO_LLC 0x1
+#define INTEL_CMCI_UCNA_LLC 0x2
+
+/* AMD: Memory Error */
+#define MCG_STATUS_MEM_VAL 0x5
+#define MCE_MEM_BANK 0x4
+#define MCi_STATUS_MEM_VAL 0xb4000000001c0100UL
+//#define MCi_STATUS_MEM_VAL 0xb600000000000100UL
+#define MCi_MISC_MEM_VAL 0x0
+
+/* AMD: L3 Cache Error */
+#define MCG_STATUS_L3_VAL 0x5
+#define MCE_L3_BANK 0x4
+#define MCi_STATUS_L3_VAL 0xbc000400001c010bULL
+#define MC4_MISC0_VAL 0x0
+#define MC4_MISC1_VAL 0x0
+#define MC4_MISC2_L3_VAL 0xc008000000000003ULL
+
+/* AMD: CPU corruption error */
+#define MCG_STATUS_CPU_VAL 0x5
+#define MCE_CPU_BANK 0x2
+#define MCi_STATUS_CPU_VAL 0x9200000000000000ULL
+//#define MCi_STATUS_CPU_VAL 0xb200000000000000ULL
+
+/* AMD: Error Types */
+#define AMD_MCE_MEM 0x20 /* memory error */
+#define AMD_MCE_L3 0x21 /* l3 cache */
#define LOGFILE stdout
int dump;
+int opt_exception;
struct xen_mc_msrinject msr_inj;
+int cpu_is_amd;
+int cpu_is_intel;
+
static void Lprintf(const char *fmt, ...)
{
@@ -145,7 +181,7 @@ static int mca_cpuinfo(xc_interface *xc_
return 0;
}
-static int inject_cmci(xc_interface *xc_handle, int cpu_nr)
+static int intel_inject_cmci(xc_interface *xc_handle)
{
struct xen_mc mc;
int nr_cpus;
@@ -191,6 +227,15 @@ static uint64_t bank_addr(int bank, int
case MCi_type_MISC:
addr = MSR_IA32_MC0_CTL + (bank * 4) + type;
break;
+ case MC4_type_MISC1:
+ addr = 0xc0000408;
+ break;
+ case MC4_type_MISC2:
+ addr = 0xc0000409;
+ break;
+ case MC4_type_MISC3:
+ addr = 0xc000040a;
+ break;
case MCi_type_CTL2:
addr = MSR_IA32_MC0_CTL2 + bank;
break;
@@ -356,12 +401,11 @@ static int inject_mci_status(xc_interfac
}
static int inject_mci_misc(xc_interface *xc_handle,
- uint32_t cpu_nr,
- uint64_t bank,
- uint64_t val)
+ uint32_t cpu_nr, uint32_t misctype,
+ uint64_t bank, uint64_t val)
{
return add_msr_bank_intpose(xc_handle, cpu_nr, MC_MSRINJ_F_INTERPOSE,
- MCi_type_MISC, bank, val);
+ MCi_type_MISC + misctype, bank, val);
}
static int inject_mci_addr(xc_interface *xc_handle,
@@ -373,10 +417,8 @@ static int inject_mci_addr(xc_interface
MCi_type_ADDR, bank, val);
}
-static int inject_llc_srao(xc_interface *xc_handle,
- uint32_t cpu_nr,
- uint32_t domain,
- uint64_t gaddr)
+static int intel_inject_llc_srao(xc_interface *xc_handle,
+ uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
{
uint64_t gpfn, mfn, haddr;
int ret = 0;
@@ -390,7 +432,7 @@ static int inject_llc_srao(xc_interface
if ( ret )
err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
- ret = inject_mci_misc(xc_handle, cpu_nr,
+ ret = inject_mci_misc(xc_handle, cpu_nr, 0,
MCE_SRAO_LLC_BANK, MCi_MISC_SRAO_LLC_VAL);
if ( ret )
err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -407,17 +449,17 @@ static int inject_llc_srao(xc_interface
ret = flush_msr_inj(xc_handle);
if ( ret )
err(xc_handle, "Failed to inject MSR\n");
- ret = inject_mce(xc_handle, cpu_nr);
- if ( ret )
- err(xc_handle, "Failed to inject MCE error\n");
+ if (opt_exception) {
+ ret = inject_mce(xc_handle, cpu_nr);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCE error\n");
+ }
return 0;
}
-static int inject_mem_srao(xc_interface *xc_handle,
- uint32_t cpu_nr,
- uint32_t domain,
- uint64_t gaddr)
+static int intel_inject_mem_srao(xc_interface *xc_handle,
+ uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
{
uint64_t gpfn, mfn, haddr;
int ret = 0;
@@ -431,7 +473,7 @@ static int inject_mem_srao(xc_interface
if ( ret )
err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
- ret = inject_mci_misc(xc_handle, cpu_nr,
+ ret = inject_mci_misc(xc_handle, cpu_nr, 0,
MCE_SRAO_MEM_BANK, MCi_MISC_SRAO_MEM_VAL);
if ( ret )
err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -448,17 +490,17 @@ static int inject_mem_srao(xc_interface
ret = flush_msr_inj(xc_handle);
if ( ret )
err(xc_handle, "Failed to inject MSR\n");
- ret = inject_mce(xc_handle, cpu_nr);
- if ( ret )
- err(xc_handle, "Failed to inject MCE error\n");
+ if (opt_exception) {
+ ret = inject_mce(xc_handle, cpu_nr);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCE error\n");
+ }
return 0;
}
-static int inject_llc_ucna(xc_interface *xc_handle,
- uint32_t cpu_nr,
- uint32_t domain,
- uint64_t gaddr)
+static int intel_inject_llc_ucna(xc_interface *xc_handle,
+ uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
{
uint64_t gpfn, mfn, haddr;
int ret = 0;
@@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface
if ( ret )
err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
- ret = inject_mci_misc(xc_handle, cpu_nr,
+ ret = inject_mci_misc(xc_handle, cpu_nr, 0,
CMCI_UCNA_LLC_BANK, MCi_MISC_UCNA_LLC_VAL);
if ( ret )
err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -489,13 +531,108 @@ static int inject_llc_ucna(xc_interface
ret = flush_msr_inj(xc_handle);
if ( ret )
err(xc_handle, "Failed to inject MSR\n");
- ret = inject_cmci(xc_handle, cpu_nr);
+ ret = intel_inject_cmci(xc_handle);
if ( ret )
err(xc_handle, "Failed to inject MCE error\n");
return 0;
}
+static int amd_inject_mem(xc_interface *xc_handle,
+ uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+ uint64_t gpfn, mfn, haddr;
+ int ret = 0;
+
+ ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_MEM_VAL);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+ ret = inject_mci_status(xc_handle, cpu_nr,
+ MCE_MEM_BANK, MCi_STATUS_MEM_VAL);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+ ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+ MCE_MEM_BANK, MCi_MISC_MEM_VAL);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCi_MISC MSR\n");
+
+ gpfn = gaddr >> PAGE_SHIFT;
+ mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+ if (!mfn_valid(mfn))
+ err(xc_handle, "The MFN is not valid\n");
+ haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+ ret = inject_mci_addr(xc_handle, cpu_nr, MCE_MEM_BANK, haddr);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+ ret = flush_msr_inj(xc_handle);
+ if ( ret )
+ err(xc_handle, "Failed to inject MSR\n");
+
+ if (opt_exception) {
+ ret = inject_mce(xc_handle, cpu_nr);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCE error\n");
+ }
+
+ return 0;
+}
+
+static int amd_inject_l3(xc_interface *xc_handle,
+ uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+ uint64_t gpfn, mfn, haddr;
+ int ret = 0;
+
+ ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+ ret = inject_mci_status(xc_handle, cpu_nr,
+ MCE_L3_BANK, MCi_STATUS_L3_VAL);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+ ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+ MCE_L3_BANK, MC4_MISC0_VAL);
+ if ( ret )
+ err(xc_handle, "Failed to inject MC4_MISC0 MSR\n");
+
+ ret = inject_mci_misc(xc_handle, cpu_nr, 1,
+ MCE_L3_BANK, MC4_MISC1_VAL);
+ if ( ret )
+ err(xc_handle, "Failed to inject MC4_MISC1 MSR\n");
+
+ ret = inject_mci_misc(xc_handle, cpu_nr, 2,
+ MCE_L3_BANK, MC4_MISC2_L3_VAL);
+ if ( ret )
+ err(xc_handle, "Failed to inject MC4_MISC2 MSR\n");
+
+ gpfn = gaddr >> PAGE_SHIFT;
+ mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+ if (!mfn_valid(mfn))
+ err(xc_handle, "The MFN is not valid\n");
+ haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+ ret = inject_mci_addr(xc_handle, cpu_nr, MCE_L3_BANK, haddr);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+ ret = flush_msr_inj(xc_handle);
+ if ( ret )
+ err(xc_handle, "Failed to inject MSR\n");
+
+ if (opt_exception) {
+ ret = inject_mce(xc_handle, cpu_nr);
+ if ( ret )
+ err(xc_handle, "Failed to inject MCE error\n");
+ }
+
+ return 0;
+}
+
+
static long xs_get_dom_mem(int domid)
{
char path[128];
@@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid)
if (!xs)
return -1;
- sprintf(path, "/local/domain/%d/memory/target", domid);
+ snprintf(path, sizeof(path), "/local/domain/%d/memory/target",
domid);
memstr = xs_read(xs, XBT_NULL, path, &plen);
xs_daemon_close(xs);
@@ -540,30 +677,80 @@ static void help(void)
" -D, --dump dump addr info without error
injection\n"
" -c, --cpu=CPU_ID target CPU\n"
" -d, --domain=DomID target domain, the default is Xen
itself\n"
+ " -e raise MCE exception\n"
" -h, --help print this page\n"
" -p, --phyaddr physical address\n"
" -t, --type=error error type\n"
- " 0 : MCE_SRAO_MEM\n"
- " 1 : MCE_SRAO_LLC\n"
- " 2 : CMCI_UCNA_LLC\n"
+ " 0x0 : MCE_SRAO_MEM (Intel
only)\n"
+ " 0x1 : MCE_SRAO_LLC (Intel
only)\n"
+ " 0x2 : CMCI_UCNA_LLC (Intel
only)\n"
+ " 0x20: DRAM error (AMD only)\n"
+ " 0x21: L3 cache error (AMD
only)\n"
"\n"
);
}
+static void cpuid(const unsigned int *input, unsigned int *regs)
+{
+ unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
+ asm (
+#ifdef __i386__
+ "push %%ebx; push %%edx\n\t"
+#else
+ "push %%rbx; push %%rdx\n\t"
+#endif
+ "cpuid\n\t"
+ "mov %%ebx,4(%4)\n\t"
+ "mov %%edx,12(%4)\n\t"
+#ifdef __i386__
+ "pop %%edx; pop %%ebx\n\t"
+#else
+ "pop %%rdx; pop %%rbx\n\t"
+#endif
+ : "=a" (regs[0]), "=c" (regs[2])
+ : "0" (input[0]), "1" (count), "S" (regs)
+ : "memory" );
+}
+
+/* Get the manufacturer brand name of the host processor. */
+static void cpuid_brand_get(char *str)
+{
+ unsigned int input[2] = { 0, 0 };
+ unsigned int regs[4];
+
+ cpuid(input, regs);
+
+ *(uint32_t *)(str + 0) = regs[1];
+ *(uint32_t *)(str + 4) = regs[3];
+ *(uint32_t *)(str + 8) = regs[2];
+ str[12] = ''\0'';
+}
+
int main(int argc, char *argv[])
{
- int type = MCE_SRAO_MEM;
+ int type;
int c, opt_index;
uint32_t domid;
xc_interface *xc_handle;
- int cpu_nr;
- int64_t gaddr, gpfn, mfn, haddr, max_gpa;
+ unsigned int cpu_nr;
+ uint64_t gaddr, gpfn, mfn, haddr, max_gpa;
+ char cpu_brand[13];
/* Default Value */
domid = DOMID_XEN;
gaddr = 0x180020;
cpu_nr = 0;
+ cpu_is_amd = cpu_is_intel = 0;
+ cpuid_brand_get(cpu_brand);
+ if (strstr(cpu_brand, "AMD"))
+ cpu_is_amd = 1;
+ else
+ cpu_is_intel = 1;
+
+ if (cpu_is_intel)
+ type = INTEL_MCE_SRAO_MEM;
+
init_msr_inj();
xc_handle = xc_interface_open(0, 0, 0);
if ( !xc_handle ) {
@@ -571,8 +758,8 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE);
}
- while ( 1 ) {
- c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts,
&opt_index);
+ for (;;) {
+ c = getopt_long(argc, argv, "c:Dd:t:hp:r:e", opts,
&opt_index);
if ( c == -1 )
break;
switch ( c ) {
@@ -580,23 +767,26 @@ int main(int argc, char *argv[])
dump=1;
break;
case ''c'':
- cpu_nr = strtol(optarg, &optarg, 10);
+ cpu_nr = strtoul(optarg, &optarg, 0);
if ( strlen(optarg) != 0 )
err(xc_handle, "Please input a digit parameter for
CPU\n");
break;
case ''d'':
- domid = strtol(optarg, &optarg, 10);
+ domid = strtoul(optarg, &optarg, 0);
if ( strlen(optarg) != 0 )
err(xc_handle, "Please input a digit parameter for
domain\n");
break;
case ''p'':
- gaddr = strtol(optarg, &optarg, 0);
+ gaddr = strtoul(optarg, &optarg, 0);
if ( strlen(optarg) != 0 )
err(xc_handle, "Please input correct page
address\n");
break;
case ''t'':
type = strtol(optarg, NULL, 0);
break;
+ case ''e'':
+ opt_exception = 1;
+ break;
case ''h'':
default:
help();
@@ -627,16 +817,26 @@ int main(int argc, char *argv[])
goto out;
}
- switch ( type )
- {
- case MCE_SRAO_MEM:
- inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
+ switch ( type ) {
+ case INTEL_MCE_SRAO_MEM:
+ if ( cpu_is_intel )
+ intel_inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
break;
- case MCE_SRAO_LLC:
- inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
+ case INTEL_MCE_SRAO_LLC:
+ if ( cpu_is_intel )
+ intel_inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
break;
- case CMCI_UCNA_LLC:
- inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+ case INTEL_CMCI_UCNA_LLC:
+ if ( cpu_is_intel )
+ intel_inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+ break;
+ case AMD_MCE_MEM:
+ if ( cpu_is_amd )
+ amd_inject_mem(xc_handle, cpu_nr, domid, gaddr);
+ break;
+ case AMD_MCE_L3:
+ if ( cpu_is_amd )
+ amd_inject_l3(xc_handle, cpu_nr, domid, gaddr);
break;
default:
err(xc_handle, "Unsupported error type\n");