a8206654c64f:patches chegger$ cat xen_mceinj.diff commit a277555e158c87aed34196f72eba0a4cf8f0fb38 Author: Christoph Egger <chegger@amazon.de> Date: Wed Feb 27 14:52:19 2013 +0000 xen-mceinj: Support AMD. Add -e option. Add support for AMD. Add -e option to raise an exception. Signed-off-by: Christoph Egger <chegger@amazon.de> diff --git a/tools/tests/mce-test/tools/xen-mceinj.c b/tools/tests/mce-test/tools/xen-mceinj.c index e3e62f7..7400a02 100644 --- a/tools/tests/mce-test/tools/xen-mceinj.c +++ b/tools/tests/mce-test/tools/xen-mceinj.c @@ -1,6 +1,8 @@ /* * xen-mceinj.c: utilities to inject fake MCE for x86. * Copyright (c) 2010, Intel Corporation. + * Copyright (c) 2012, AMD Cooperation Inc. + * Copyright (c) 2013, Amazon.com, Inc. or its affiliates. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -18,6 +20,7 @@ * Authors: Yunhong Jiang <yunhong.jiang@intel.com> * Haicheng Li <haicheng.li@intel.com> * Xudong Hao <xudong.hao@intel.com> + * Christoph Egger <chegger@amazon.de> */ @@ -44,11 +47,14 @@ #define MCi_type_STATUS 0x1 #define MCi_type_ADDR 0x2 #define MCi_type_MISC 0x3 -#define MCi_type_CTL2 0x4 +#define MC4_type_MISC1 0x4 +#define MC4_type_MISC2 0x5 +#define MC4_type_MISC3 0x6 +#define MCi_type_CTL2 0x7 #define INVALID_MSR ~0UL -/* Intel MSRs */ +/* X86 machine check MSRs */ #define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b @@ -56,35 +62,63 @@ #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_MISC 0x00000403 + +/* Intel MSRs */ #define MSR_IA32_MC0_CTL2 0x00000280 -/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */ +/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */ #define MCG_STATUS_SRAO_LLC_VAL 0x5 #define MCE_SRAO_LLC_BANK 0x7 #define MCi_STATUS_SRAO_LLC_VAL 0xBD2000008000017AUL #define MCi_MISC_SRAO_LLC_VAL 0x86UL -/* Memory Patrol Scrub SRAO MCE */ +/* Intel: Memory Patrol Scrub SRAO MCE */ #define MCG_STATUS_SRAO_MEM_VAL 0x5 #define MCE_SRAO_MEM_BANK 0x8 #define MCi_STATUS_SRAO_MEM_VAL 0xBD000000004000CFUL #define MCi_MISC_SRAO_MEM_VAL 0x86UL -/* LLC EWB UCNA Error */ +/* Intel: LLC EWB UCNA Error */ #define MCG_STATUS_UCNA_LLC_VAL 0x0 #define CMCI_UCNA_LLC_BANK 0x9 #define MCi_STATUS_UCNA_LLC_VAL 0xBC20000080000136UL #define MCi_MISC_UCNA_LLC_VAL 0x86UL -/* Error Types */ -#define MCE_SRAO_MEM 0x0 -#define MCE_SRAO_LLC 0x1 -#define CMCI_UCNA_LLC 0x2 +/* Intel: Error Types */ +#define INTEL_MCE_SRAO_MEM 0x0 +#define INTEL_MCE_SRAO_LLC 0x1 +#define INTEL_CMCI_UCNA_LLC 0x2 + +/* AMD: Memory Error */ +#define MCG_STATUS_MEM_VAL 0x5 +#define MCE_MEM_BANK 0x4 +#define MCi_STATUS_MEM_VAL 0xb4000000001c0100UL +//#define MCi_STATUS_MEM_VAL 0xb600000000000100UL +#define MCi_MISC_MEM_VAL 0x0 + +/* AMD: L3 Cache Error */ +#define MCG_STATUS_L3_VAL 0x5 +#define MCE_L3_BANK 0x4 +#define MCi_STATUS_L3_VAL 0xbc000400001c010bULL +#define MC4_MISC0_VAL 0x0 +#define MC4_MISC1_VAL 0x0 +#define MC4_MISC2_L3_VAL 0xc008000000000003ULL + +/* AMD: Error Types */ +#define AMD_MCE_MEM 0x0 /* memory error */ +#define AMD_MCE_L3 0x1 /* l3 cache */ #define LOGFILE stdout -int dump; -struct xen_mc_msrinject msr_inj; +static int dump; +static int opt_exception; +static struct xen_mc_msrinject msr_inj; + +#define CPU_VENDOR_UNKNOWN -1 +#define CPU_VENDOR_AMD 0 +#define CPU_VENDOR_INTEL 1 +static int cpu_vendor; + static void Lprintf(const char *fmt, ...) { @@ -145,7 +179,7 @@ static int mca_cpuinfo(xc_interface *xc_handle) return 0; } -static int inject_cmci(xc_interface *xc_handle, int cpu_nr) +static int intel_inject_cmci(xc_interface *xc_handle) { struct xen_mc mc; int nr_cpus; @@ -191,6 +225,15 @@ static uint64_t bank_addr(int bank, int type) case MCi_type_MISC: addr = MSR_IA32_MC0_CTL + (bank * 4) + type; break; + case MC4_type_MISC1: + addr = 0xc0000408; + break; + case MC4_type_MISC2: + addr = 0xc0000409; + break; + case MC4_type_MISC3: + addr = 0xc000040a; + break; case MCi_type_CTL2: addr = MSR_IA32_MC0_CTL2 + bank; break; @@ -356,12 +399,11 @@ static int inject_mci_status(xc_interface *xc_handle, } static int inject_mci_misc(xc_interface *xc_handle, - uint32_t cpu_nr, - uint64_t bank, - uint64_t val) + uint32_t cpu_nr, uint32_t misctype, + uint64_t bank, uint64_t val) { return add_msr_bank_intpose(xc_handle, cpu_nr, MC_MSRINJ_F_INTERPOSE, - MCi_type_MISC, bank, val); + MCi_type_MISC + misctype, bank, val); } static int inject_mci_addr(xc_interface *xc_handle, @@ -373,10 +415,8 @@ static int inject_mci_addr(xc_interface *xc_handle, MCi_type_ADDR, bank, val); } -static int inject_llc_srao(xc_interface *xc_handle, - uint32_t cpu_nr, - uint32_t domain, - uint64_t gaddr) +static int intel_inject_llc_srao(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { uint64_t gpfn, mfn, haddr; int ret = 0; @@ -390,7 +430,7 @@ static int inject_llc_srao(xc_interface *xc_handle, if ( ret ) err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); - ret = inject_mci_misc(xc_handle, cpu_nr, + ret = inject_mci_misc(xc_handle, cpu_nr, 0, MCE_SRAO_LLC_BANK, MCi_MISC_SRAO_LLC_VAL); if ( ret ) err(xc_handle, "Failed to inject MCi_MISC MSR\n"); @@ -407,17 +447,18 @@ static int inject_llc_srao(xc_interface *xc_handle, ret = flush_msr_inj(xc_handle); if ( ret ) err(xc_handle, "Failed to inject MSR\n"); - ret = inject_mce(xc_handle, cpu_nr); - if ( ret ) - err(xc_handle, "Failed to inject MCE error\n"); + + if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); + if ( ret ) + err(xc_handle, "Failed to inject MCE error\n"); + } return 0; } -static int inject_mem_srao(xc_interface *xc_handle, - uint32_t cpu_nr, - uint32_t domain, - uint64_t gaddr) +static int intel_inject_mem_srao(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { uint64_t gpfn, mfn, haddr; int ret = 0; @@ -431,7 +472,7 @@ static int inject_mem_srao(xc_interface *xc_handle, if ( ret ) err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); - ret = inject_mci_misc(xc_handle, cpu_nr, + ret = inject_mci_misc(xc_handle, cpu_nr, 0, MCE_SRAO_MEM_BANK, MCi_MISC_SRAO_MEM_VAL); if ( ret ) err(xc_handle, "Failed to inject MCi_MISC MSR\n"); @@ -448,17 +489,18 @@ static int inject_mem_srao(xc_interface *xc_handle, ret = flush_msr_inj(xc_handle); if ( ret ) err(xc_handle, "Failed to inject MSR\n"); - ret = inject_mce(xc_handle, cpu_nr); - if ( ret ) - err(xc_handle, "Failed to inject MCE error\n"); + + if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); + if ( ret ) + err(xc_handle, "Failed to inject MCE error\n"); + } return 0; } -static int inject_llc_ucna(xc_interface *xc_handle, - uint32_t cpu_nr, - uint32_t domain, - uint64_t gaddr) +static int intel_inject_llc_ucna(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { uint64_t gpfn, mfn, haddr; int ret = 0; @@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface *xc_handle, if ( ret ) err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); - ret = inject_mci_misc(xc_handle, cpu_nr, + ret = inject_mci_misc(xc_handle, cpu_nr, 0, CMCI_UCNA_LLC_BANK, MCi_MISC_UCNA_LLC_VAL); if ( ret ) err(xc_handle, "Failed to inject MCi_MISC MSR\n"); @@ -489,13 +531,108 @@ static int inject_llc_ucna(xc_interface *xc_handle, ret = flush_msr_inj(xc_handle); if ( ret ) err(xc_handle, "Failed to inject MSR\n"); - ret = inject_cmci(xc_handle, cpu_nr); + ret = intel_inject_cmci(xc_handle); if ( ret ) err(xc_handle, "Failed to inject MCE error\n"); return 0; } +static int amd_inject_mem(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ + uint64_t gpfn, mfn, haddr; + int ret = 0; + + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_MEM_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCG_STATUS MSR\n"); + + ret = inject_mci_status(xc_handle, cpu_nr, + MCE_MEM_BANK, MCi_STATUS_MEM_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); + + ret = inject_mci_misc(xc_handle, cpu_nr, 0, + MCE_MEM_BANK, MCi_MISC_MEM_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCi_MISC MSR\n"); + + gpfn = gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, MCE_MEM_BANK, haddr); + if ( ret ) + err(xc_handle, "Failed to inject MCi_ADDR MSR\n"); + + ret = flush_msr_inj(xc_handle); + if ( ret ) + err(xc_handle, "Failed to inject MSR\n"); + + if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); + if ( ret ) + err(xc_handle, "Failed to inject MCE error\n"); + } + + return 0; +} + +static int amd_inject_l3(xc_interface *xc_handle, + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ + uint64_t gpfn, mfn, haddr; + int ret = 0; + + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCG_STATUS MSR\n"); + + ret = inject_mci_status(xc_handle, cpu_nr, + MCE_L3_BANK, MCi_STATUS_L3_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); + + ret = inject_mci_misc(xc_handle, cpu_nr, 0, + MCE_L3_BANK, MC4_MISC0_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MC4_MISC0 MSR\n"); + + ret = inject_mci_misc(xc_handle, cpu_nr, 1, + MCE_L3_BANK, MC4_MISC1_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MC4_MISC1 MSR\n"); + + ret = inject_mci_misc(xc_handle, cpu_nr, 2, + MCE_L3_BANK, MC4_MISC2_L3_VAL); + if ( ret ) + err(xc_handle, "Failed to inject MC4_MISC2 MSR\n"); + + gpfn = gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, MCE_L3_BANK, haddr); + if ( ret ) + err(xc_handle, "Failed to inject MCi_ADDR MSR\n"); + + ret = flush_msr_inj(xc_handle); + if ( ret ) + err(xc_handle, "Failed to inject MSR\n"); + + if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); + if ( ret ) + err(xc_handle, "Failed to inject MCE error\n"); + } + + return 0; +} + + static long xs_get_dom_mem(int domid) { char path[128]; @@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid) if (!xs) return -1; - sprintf(path, "/local/domain/%d/memory/target", domid); + snprintf(path, sizeof(path), "/local/domain/%d/memory/target", domid); memstr = xs_read(xs, XBT_NULL, path, &plen); xs_daemon_close(xs); @@ -540,30 +677,109 @@ static void help(void) " -D, --dump dump addr info without error injection\n" " -c, --cpu=CPU_ID target CPU\n" " -d, --domain=DomID target domain, the default is Xen itself\n" + " -e raise MCE exception\n" " -h, --help print this page\n" " -p, --phyaddr physical address\n" - " -t, --type=error error type\n" - " 0 : MCE_SRAO_MEM\n" - " 1 : MCE_SRAO_LLC\n" - " 2 : CMCI_UCNA_LLC\n" - "\n" ); + + if (cpu_vendor == CPU_VENDOR_INTEL) { + printf( + " -t, --type=error error type\n" + " 0x0 : SRAO MEM\n" + " 0x1 : SRAO LLC\n" + " 0x2 : CMCI UCNA LLC\n"); + } + if (cpu_vendor == CPU_VENDOR_AMD) { + printf( + " -t, --type=error error type\n" + " 0x0: DRAM error\n" + " 0x1: L3 cache error\n"); + } + printf("\n"); +} + +static void cpuid(const unsigned int *input, unsigned int regs[4]) +{ + unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1]; +#ifdef __i386__ + /* Use the stack to avoid reg constraint failures with some gcc flags */ + asm ( + "push %%ebx; push %%edx\n\t" + "cpuid\n\t" + "mov %%ebx,4(%4)\n\t" + "mov %%edx,12(%4)\n\t" + "pop %%edx; pop %%ebx\n\t" + : "=a" (regs[0]), "=c" (regs[2]) + : "0" (input[0]), "1" (count), "S" (regs) + : "memory" ); +#else + asm ( + "cpuid" + : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3]) + : "0" (input[0]), "2" (count) ); +#endif +} + +/* Get the manufacturer brand name of the host processor. */ +static void cpuid_brand_get(char *str, size_t len) +{ + unsigned int input[2] = { 0, 0 }; + union { + unsigned int regs[4]; + struct { + char eax[4]; + char ebx[4]; + char ecx[4]; + char edx[4]; + } str_regs; + } cpu_branding; + + cpuid(input, cpu_branding.regs); + + snprintf(str, len, "%c%c%c%c%c%c%c%c%c%c%c%c", + cpu_branding.str_regs.ebx[0], cpu_branding.str_regs.ebx[1], + cpu_branding.str_regs.ebx[2], cpu_branding.str_regs.ebx[3], + cpu_branding.str_regs.edx[0], cpu_branding.str_regs.edx[1], + cpu_branding.str_regs.edx[2], cpu_branding.str_regs.edx[3], + cpu_branding.str_regs.ecx[0], cpu_branding.str_regs.ecx[1], + cpu_branding.str_regs.ecx[2], cpu_branding.str_regs.ecx[3]); } int main(int argc, char *argv[]) { - int type = MCE_SRAO_MEM; + int type; int c, opt_index; uint32_t domid; xc_interface *xc_handle; - int cpu_nr; - int64_t gaddr, gpfn, mfn, haddr, max_gpa; + unsigned int cpu_nr; + uint64_t gaddr, gpfn, mfn, haddr, max_gpa; + char cpu_brand[13]; /* Default Value */ domid = DOMID_XEN; gaddr = 0x180020; cpu_nr = 0; + cpu_vendor = CPU_VENDOR_UNKNOWN; + cpuid_brand_get(cpu_brand, sizeof(cpu_brand)); + if (strstr(cpu_brand, "AMD")) + cpu_vendor = CPU_VENDOR_AMD; + if (strstr(cpu_brand, "Intel")) + cpu_vendor = CPU_VENDOR_INTEL; + + switch (cpu_vendor) { + case CPU_VENDOR_AMD: + type = AMD_MCE_MEM; + break; + case CPU_VENDOR_INTEL: + type = INTEL_MCE_SRAO_MEM; + break; + case CPU_VENDOR_UNKNOWN: + default: + Lprintf("Unknown cpu vendor on this machine\n"); + exit(EXIT_FAILURE); + } + init_msr_inj(); xc_handle = xc_interface_open(0, 0, 0); if ( !xc_handle ) { @@ -571,8 +787,8 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - while ( 1 ) { - c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, &opt_index); + for (;;) { + c = getopt_long(argc, argv, "c:Dd:t:hp:r:e", opts, &opt_index); if ( c == -1 ) break; switch ( c ) { @@ -580,23 +796,26 @@ int main(int argc, char *argv[]) dump=1; break; case ''c'': - cpu_nr = strtol(optarg, &optarg, 10); + cpu_nr = strtoul(optarg, &optarg, 0); if ( strlen(optarg) != 0 ) err(xc_handle, "Please input a digit parameter for CPU\n"); break; case ''d'': - domid = strtol(optarg, &optarg, 10); + domid = strtoul(optarg, &optarg, 0); if ( strlen(optarg) != 0 ) err(xc_handle, "Please input a digit parameter for domain\n"); break; case ''p'': - gaddr = strtol(optarg, &optarg, 0); + gaddr = strtoul(optarg, &optarg, 0); if ( strlen(optarg) != 0 ) err(xc_handle, "Please input correct page address\n"); break; case ''t'': type = strtol(optarg, NULL, 0); break; + case ''e'': + opt_exception = 1; + break; case ''h'': default: help(); @@ -627,19 +846,36 @@ int main(int argc, char *argv[]) goto out; } - switch ( type ) - { - case MCE_SRAO_MEM: - inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); - break; - case MCE_SRAO_LLC: - inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); - break; - case CMCI_UCNA_LLC: - inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); + switch ( cpu_vendor ) { + case CPU_VENDOR_INTEL: + switch ( type ) { + case INTEL_MCE_SRAO_MEM: + intel_inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); + break; + case INTEL_MCE_SRAO_LLC: + intel_inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); + break; + case INTEL_CMCI_UCNA_LLC: + intel_inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); + break; + default: + err(xc_handle, "Unsupported error type\n"); + break; + } break; - default: - err(xc_handle, "Unsupported error type\n"); + + case CPU_VENDOR_AMD: + switch ( type ) { + case AMD_MCE_MEM: + amd_inject_mem(xc_handle, cpu_nr, domid, gaddr); + break; + case AMD_MCE_L3: + amd_inject_l3(xc_handle, cpu_nr, domid, gaddr); + break; + default: + err(xc_handle, "Unsupported error type\n"); + break; + } break; } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
>>> On 30.05.13 at 16:33, "Egger, Christoph" <chegger@amazon.de> wrote: > a8206654c64f:patches chegger$ cat xen_mceinj.diff > commit a277555e158c87aed34196f72eba0a4cf8f0fb38 > Author: Christoph Egger <chegger@amazon.de> > Date: Wed Feb 27 14:52:19 2013 +0000 > > xen-mceinj: Support AMD. Add -e option. > > Add support for AMD. > Add -e option to raise an exception. > > Signed-off-by: Christoph Egger <chegger@amazon.de>Apart from needing an ack from someone at Intel, this also is in need of quite a bit of coding style cleanup - the utility so far is, with a few exceptions, written in hypervisor style, so your additions should follow suit.> @@ -145,7 +179,7 @@ static int mca_cpuinfo(xc_interface *xc_handle) > return 0; > } > > -static int inject_cmci(xc_interface *xc_handle, int cpu_nr) > +static int intel_inject_cmci(xc_interface *xc_handle) > { > struct xen_mc mc; > int nr_cpus;I didn''t look too closely, but at least this one is still an unrelated change that''s also not mentioned in the description (i.e. as being cleanup that''s being done as you go). Jan
On Fri, 2013-05-31 at 09:30 +0100, Jan Beulich wrote:> >>> On 30.05.13 at 16:33, "Egger, Christoph" <chegger@amazon.de> wrote: > > a8206654c64f:patches chegger$ cat xen_mceinj.diff > > commit a277555e158c87aed34196f72eba0a4cf8f0fb38 > > Author: Christoph Egger <chegger@amazon.de> > > Date: Wed Feb 27 14:52:19 2013 +0000 > > > > xen-mceinj: Support AMD. Add -e option. > > > > Add support for AMD. > > Add -e option to raise an exception. > > > > Signed-off-by: Christoph Egger <chegger@amazon.de> > > Apart from needing an ack from someone at Intel,I''m not sure this needs to be all that strictly enforced for what is AIUI basically a devtest tool. Having the ACK would be better than not having it, but this patch has IIRC been posted several times over a 6 month period without comment from Intel and there''s no reason to hold it up indefinitely waiting for them IMHO. I''m not sure if Intel were CCd prior to this particular posting -- if not then we should give them a chance to respond but otherwise I think we should take this patch once your other comments are addressed. Ian.
Seems it cannot patch to latest tree, so please rebase (I agree it''s our fault not response in time ... sorry). Some comments below. Thanks, Jinsong Egger, Christoph wrote:> a8206654c64f:patches chegger$ cat xen_mceinj.diff > commit a277555e158c87aed34196f72eba0a4cf8f0fb38 > Author: Christoph Egger <chegger@amazon.de> > Date: Wed Feb 27 14:52:19 2013 +0000 > > xen-mceinj: Support AMD. Add -e option. > > Add support for AMD.It''s pretty OK for this purpose, but ...> Add -e option to raise an exception.... why need -e option w/ opt_exception variable? AFAICT, it works fine w/o -e. Please add more comments for strong reason, especially it need force user to change command line.> > Signed-off-by: Christoph Egger <chegger@amazon.de> > > diff --git a/tools/tests/mce-test/tools/xen-mceinj.c > b/tools/tests/mce-test/tools/xen-mceinj.c > index e3e62f7..7400a02 100644 > --- a/tools/tests/mce-test/tools/xen-mceinj.c > +++ b/tools/tests/mce-test/tools/xen-mceinj.c > @@ -1,6 +1,8 @@ > /* > * xen-mceinj.c: utilities to inject fake MCE for x86. > * Copyright (c) 2010, Intel Corporation. > + * Copyright (c) 2012, AMD Cooperation Inc. > + * Copyright (c) 2013, Amazon.com, Inc. or its affiliates. > * > * This program is free software; you can redistribute it and/or > modify it > * under the terms and conditions of the GNU General Public License, > @@ -18,6 +20,7 @@ > * Authors: Yunhong Jiang <yunhong.jiang@intel.com> > * Haicheng Li <haicheng.li@intel.com> > * Xudong Hao <xudong.hao@intel.com> > + * Christoph Egger <chegger@amazon.de> > */ > > > @@ -44,11 +47,14 @@ > #define MCi_type_STATUS 0x1 > #define MCi_type_ADDR 0x2 > #define MCi_type_MISC 0x3 > -#define MCi_type_CTL2 0x4 > +#define MC4_type_MISC1 0x4 > +#define MC4_type_MISC2 0x5 > +#define MC4_type_MISC3 0x6 > +#define MCi_type_CTL2 0x7Why change original sequence? I didn''t see the necessity of doing so --> please add new types behind old ones so that we don''t need test old logic again.> > #define INVALID_MSR ~0UL > > -/* Intel MSRs */ > +/* X86 machine check MSRs */ > #define MSR_IA32_MCG_CAP 0x00000179 > #define MSR_IA32_MCG_STATUS 0x0000017a > #define MSR_IA32_MCG_CTL 0x0000017b > @@ -56,35 +62,63 @@ > #define MSR_IA32_MC0_STATUS 0x00000401 > #define MSR_IA32_MC0_ADDR 0x00000402 > #define MSR_IA32_MC0_MISC 0x00000403 > + > +/* Intel MSRs */ > #define MSR_IA32_MC0_CTL2 0x00000280 > > -/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */ > +/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE > */ #define MCG_STATUS_SRAO_LLC_VAL 0x5 > #define MCE_SRAO_LLC_BANK 0x7 > #define MCi_STATUS_SRAO_LLC_VAL 0xBD2000008000017AUL > #define MCi_MISC_SRAO_LLC_VAL 0x86UL > > -/* Memory Patrol Scrub SRAO MCE */ > +/* Intel: Memory Patrol Scrub SRAO MCE */ > #define MCG_STATUS_SRAO_MEM_VAL 0x5 > #define MCE_SRAO_MEM_BANK 0x8 > #define MCi_STATUS_SRAO_MEM_VAL 0xBD000000004000CFUL > #define MCi_MISC_SRAO_MEM_VAL 0x86UL > > -/* LLC EWB UCNA Error */ > +/* Intel: LLC EWB UCNA Error */ > #define MCG_STATUS_UCNA_LLC_VAL 0x0 > #define CMCI_UCNA_LLC_BANK 0x9 > #define MCi_STATUS_UCNA_LLC_VAL 0xBC20000080000136UL > #define MCi_MISC_UCNA_LLC_VAL 0x86UL > > -/* Error Types */ > -#define MCE_SRAO_MEM 0x0 > -#define MCE_SRAO_LLC 0x1 > -#define CMCI_UCNA_LLC 0x2 > +/* Intel: Error Types */ > +#define INTEL_MCE_SRAO_MEM 0x0 > +#define INTEL_MCE_SRAO_LLC 0x1 > +#define INTEL_CMCI_UCNA_LLC 0x2 > + > +/* AMD: Memory Error */ > +#define MCG_STATUS_MEM_VAL 0x5 > +#define MCE_MEM_BANK 0x4 > +#define MCi_STATUS_MEM_VAL 0xb4000000001c0100UL > +//#define MCi_STATUS_MEM_VAL 0xb600000000000100ULHmm, drop this test code please.> +#define MCi_MISC_MEM_VAL 0x0 > + > +/* AMD: L3 Cache Error */ > +#define MCG_STATUS_L3_VAL 0x5 > +#define MCE_L3_BANK 0x4 > +#define MCi_STATUS_L3_VAL 0xbc000400001c010bULL > +#define MC4_MISC0_VAL 0x0 > +#define MC4_MISC1_VAL 0x0 > +#define MC4_MISC2_L3_VAL 0xc008000000000003ULL > + > +/* AMD: Error Types */ > +#define AMD_MCE_MEM 0x0 /* memory error */ > +#define AMD_MCE_L3 0x1 /* l3 cache */ > > #define LOGFILE stdout > > -int dump; > -struct xen_mc_msrinject msr_inj; > +static int dump; > +static int opt_exception; > +static struct xen_mc_msrinject msr_inj; > + > +#define CPU_VENDOR_UNKNOWN -1 > +#define CPU_VENDOR_AMD 0 > +#define CPU_VENDOR_INTEL 1 > +static int cpu_vendor; > + > > static void Lprintf(const char *fmt, ...) > { > @@ -145,7 +179,7 @@ static int mca_cpuinfo(xc_interface *xc_handle) > return 0; > } > > -static int inject_cmci(xc_interface *xc_handle, int cpu_nr) > +static int intel_inject_cmci(xc_interface *xc_handle) > { > struct xen_mc mc; > int nr_cpus; > @@ -191,6 +225,15 @@ static uint64_t bank_addr(int bank, int type) > case MCi_type_MISC: > addr = MSR_IA32_MC0_CTL + (bank * 4) + type; > break; > + case MC4_type_MISC1: > + addr = 0xc0000408; > + break; > + case MC4_type_MISC2: > + addr = 0xc0000409; > + break; > + case MC4_type_MISC3: > + addr = 0xc000040a; > + break; > case MCi_type_CTL2: > addr = MSR_IA32_MC0_CTL2 + bank; > break; > @@ -356,12 +399,11 @@ static int inject_mci_status(xc_interface > *xc_handle, } > > static int inject_mci_misc(xc_interface *xc_handle, > - uint32_t cpu_nr, > - uint64_t bank, > - uint64_t val) > + uint32_t cpu_nr, uint32_t misctype, > + uint64_t bank, uint64_t val) > { > return add_msr_bank_intpose(xc_handle, cpu_nr, > MC_MSRINJ_F_INTERPOSE, - > MCi_type_MISC, bank, val); + > MCi_type_MISC + misctype, bank, val); } > > static int inject_mci_addr(xc_interface *xc_handle, > @@ -373,10 +415,8 @@ static int inject_mci_addr(xc_interface > *xc_handle, MCi_type_ADDR, bank, > val); } > > -static int inject_llc_srao(xc_interface *xc_handle, > - uint32_t cpu_nr, > - uint32_t domain, > - uint64_t gaddr) > +static int intel_inject_llc_srao(xc_interface *xc_handle, > + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) > { > uint64_t gpfn, mfn, haddr; > int ret = 0; > @@ -390,7 +430,7 @@ static int inject_llc_srao(xc_interface > *xc_handle, if ( ret ) > err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); > > - ret = inject_mci_misc(xc_handle, cpu_nr, > + ret = inject_mci_misc(xc_handle, cpu_nr, 0, > MCE_SRAO_LLC_BANK, MCi_MISC_SRAO_LLC_VAL); > if ( ret ) > err(xc_handle, "Failed to inject MCi_MISC MSR\n"); > @@ -407,17 +447,18 @@ static int inject_llc_srao(xc_interface > *xc_handle, ret = flush_msr_inj(xc_handle); > if ( ret ) > err(xc_handle, "Failed to inject MSR\n"); > - ret = inject_mce(xc_handle, cpu_nr); > - if ( ret ) > - err(xc_handle, "Failed to inject MCE error\n"); > + > + if (opt_exception) { > + ret = inject_mce(xc_handle, cpu_nr); > + if ( ret ) > + err(xc_handle, "Failed to inject MCE error\n"); > + } > > return 0; > } > > -static int inject_mem_srao(xc_interface *xc_handle, > - uint32_t cpu_nr, > - uint32_t domain, > - uint64_t gaddr) > +static int intel_inject_mem_srao(xc_interface *xc_handle, > + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) > { > uint64_t gpfn, mfn, haddr; > int ret = 0; > @@ -431,7 +472,7 @@ static int inject_mem_srao(xc_interface > *xc_handle, if ( ret ) > err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); > > - ret = inject_mci_misc(xc_handle, cpu_nr, > + ret = inject_mci_misc(xc_handle, cpu_nr, 0, > MCE_SRAO_MEM_BANK, MCi_MISC_SRAO_MEM_VAL); > if ( ret ) > err(xc_handle, "Failed to inject MCi_MISC MSR\n"); > @@ -448,17 +489,18 @@ static int inject_mem_srao(xc_interface > *xc_handle, ret = flush_msr_inj(xc_handle); > if ( ret ) > err(xc_handle, "Failed to inject MSR\n"); > - ret = inject_mce(xc_handle, cpu_nr); > - if ( ret ) > - err(xc_handle, "Failed to inject MCE error\n"); > + > + if (opt_exception) { > + ret = inject_mce(xc_handle, cpu_nr); > + if ( ret ) > + err(xc_handle, "Failed to inject MCE error\n"); > + } > > return 0; > } > > -static int inject_llc_ucna(xc_interface *xc_handle, > - uint32_t cpu_nr, > - uint32_t domain, > - uint64_t gaddr) > +static int intel_inject_llc_ucna(xc_interface *xc_handle, > + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) > { > uint64_t gpfn, mfn, haddr; > int ret = 0; > @@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface > *xc_handle, if ( ret ) > err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); > > - ret = inject_mci_misc(xc_handle, cpu_nr, > + ret = inject_mci_misc(xc_handle, cpu_nr, 0, > CMCI_UCNA_LLC_BANK, MCi_MISC_UCNA_LLC_VAL); > if ( ret ) > err(xc_handle, "Failed to inject MCi_MISC MSR\n"); > @@ -489,13 +531,108 @@ static int inject_llc_ucna(xc_interface > *xc_handle, ret = flush_msr_inj(xc_handle); > if ( ret ) > err(xc_handle, "Failed to inject MSR\n"); > - ret = inject_cmci(xc_handle, cpu_nr); > + ret = intel_inject_cmci(xc_handle); > if ( ret ) > err(xc_handle, "Failed to inject MCE error\n"); > > return 0; > } > > +static int amd_inject_mem(xc_interface *xc_handle, > + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) > +{ > + uint64_t gpfn, mfn, haddr; > + int ret = 0; > + > + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_MEM_VAL); > + if ( ret ) > + err(xc_handle, "Failed to inject MCG_STATUS MSR\n"); > + > + ret = inject_mci_status(xc_handle, cpu_nr, > + MCE_MEM_BANK, MCi_STATUS_MEM_VAL); > + if ( ret ) > + err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); > + > + ret = inject_mci_misc(xc_handle, cpu_nr, 0, > + MCE_MEM_BANK, MCi_MISC_MEM_VAL); > + if ( ret ) > + err(xc_handle, "Failed to inject MCi_MISC MSR\n"); > + > + gpfn = gaddr >> PAGE_SHIFT; > + mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn); > + if (!mfn_valid(mfn)) > + err(xc_handle, "The MFN is not valid\n"); > + haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1)); > + ret = inject_mci_addr(xc_handle, cpu_nr, MCE_MEM_BANK, haddr); > + if ( ret ) > + err(xc_handle, "Failed to inject MCi_ADDR MSR\n"); > + > + ret = flush_msr_inj(xc_handle); > + if ( ret ) > + err(xc_handle, "Failed to inject MSR\n"); > + > + if (opt_exception) { > + ret = inject_mce(xc_handle, cpu_nr); > + if ( ret ) > + err(xc_handle, "Failed to inject MCE error\n"); > + } > + > + return 0; > +} > + > +static int amd_inject_l3(xc_interface *xc_handle, > + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) > +{ > + uint64_t gpfn, mfn, haddr; > + int ret = 0; > + > + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL); > + if ( ret ) > + err(xc_handle, "Failed to inject MCG_STATUS MSR\n"); > + > + ret = inject_mci_status(xc_handle, cpu_nr, > + MCE_L3_BANK, MCi_STATUS_L3_VAL); > + if ( ret ) > + err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); > + > + ret = inject_mci_misc(xc_handle, cpu_nr, 0, > + MCE_L3_BANK, MC4_MISC0_VAL); > + if ( ret ) > + err(xc_handle, "Failed to inject MC4_MISC0 MSR\n"); > + > + ret = inject_mci_misc(xc_handle, cpu_nr, 1, > + MCE_L3_BANK, MC4_MISC1_VAL); > + if ( ret ) > + err(xc_handle, "Failed to inject MC4_MISC1 MSR\n"); > + > + ret = inject_mci_misc(xc_handle, cpu_nr, 2, > + MCE_L3_BANK, MC4_MISC2_L3_VAL); > + if ( ret ) > + err(xc_handle, "Failed to inject MC4_MISC2 MSR\n"); > + > + gpfn = gaddr >> PAGE_SHIFT; > + mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn); > + if (!mfn_valid(mfn)) > + err(xc_handle, "The MFN is not valid\n"); > + haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1)); > + ret = inject_mci_addr(xc_handle, cpu_nr, MCE_L3_BANK, haddr); > + if ( ret ) > + err(xc_handle, "Failed to inject MCi_ADDR MSR\n"); > + > + ret = flush_msr_inj(xc_handle); > + if ( ret ) > + err(xc_handle, "Failed to inject MSR\n"); > + > + if (opt_exception) { > + ret = inject_mce(xc_handle, cpu_nr); > + if ( ret ) > + err(xc_handle, "Failed to inject MCE error\n"); > + } > + > + return 0; > +} > + > + > static long xs_get_dom_mem(int domid) > { > char path[128]; > @@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid) > if (!xs) > return -1; > > - sprintf(path, "/local/domain/%d/memory/target", domid); > + snprintf(path, sizeof(path), "/local/domain/%d/memory/target", > domid); memstr = xs_read(xs, XBT_NULL, path, &plen); > xs_daemon_close(xs); > > @@ -540,30 +677,109 @@ static void help(void) > " -D, --dump dump addr info without error > injection\n" > " -c, --cpu=CPU_ID target CPU\n" > " -d, --domain=DomID target domain, the default is Xen > itself\n" > + " -e raise MCE exception\n" > " -h, --help print this page\n" > " -p, --phyaddr physical address\n" > - " -t, --type=error error type\n" > - " 0 : MCE_SRAO_MEM\n" > - " 1 : MCE_SRAO_LLC\n" > - " 2 : CMCI_UCNA_LLC\n" > - "\n" > ); > + > + if (cpu_vendor == CPU_VENDOR_INTEL) { > + printf( > + " -t, --type=error error type\n" > + " 0x0 : SRAO MEM\n" > + " 0x1 : SRAO LLC\n" > + " 0x2 : CMCI UCNA LLC\n"); > + } > + if (cpu_vendor == CPU_VENDOR_AMD) { > + printf( > + " -t, --type=error error type\n" > + " 0x0: DRAM error\n" > + " 0x1: L3 cache error\n"); > + } > + printf("\n"); > +} > + > +static void cpuid(const unsigned int *input, unsigned int regs[4]) > +{ > + unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : > input[1]; > +#ifdef __i386__ > + /* Use the stack to avoid reg constraint failures with some gcc > flags */ > + asm ( > + "push %%ebx; push %%edx\n\t" > + "cpuid\n\t" > + "mov %%ebx,4(%4)\n\t" > + "mov %%edx,12(%4)\n\t" > + "pop %%edx; pop %%ebx\n\t" > + : "=a" (regs[0]), "=c" (regs[2]) > + : "0" (input[0]), "1" (count), "S" (regs) > + : "memory" ); > +#else > + asm ( > + "cpuid" > + : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" > (regs[3]) + : "0" (input[0]), "2" (count) ); > +#endif > +} > + > +/* Get the manufacturer brand name of the host processor. */ > +static void cpuid_brand_get(char *str, size_t len) > +{ > + unsigned int input[2] = { 0, 0 }; > + union { > + unsigned int regs[4]; > + struct { > + char eax[4]; > + char ebx[4]; > + char ecx[4]; > + char edx[4]; > + } str_regs; > + } cpu_branding; > + > + cpuid(input, cpu_branding.regs); > + > + snprintf(str, len, "%c%c%c%c%c%c%c%c%c%c%c%c", > + cpu_branding.str_regs.ebx[0], cpu_branding.str_regs.ebx[1], > + cpu_branding.str_regs.ebx[2], cpu_branding.str_regs.ebx[3], > + cpu_branding.str_regs.edx[0], cpu_branding.str_regs.edx[1], > + cpu_branding.str_regs.edx[2], cpu_branding.str_regs.edx[3], > + cpu_branding.str_regs.ecx[0], cpu_branding.str_regs.ecx[1], > + cpu_branding.str_regs.ecx[2], cpu_branding.str_regs.ecx[3]); > } > > int main(int argc, char *argv[]) > { > - int type = MCE_SRAO_MEM; > + int type; > int c, opt_index; > uint32_t domid; > xc_interface *xc_handle; > - int cpu_nr; > - int64_t gaddr, gpfn, mfn, haddr, max_gpa; > + unsigned int cpu_nr; > + uint64_t gaddr, gpfn, mfn, haddr, max_gpa; > + char cpu_brand[13]; > > /* Default Value */ > domid = DOMID_XEN; > gaddr = 0x180020; > cpu_nr = 0; > > + cpu_vendor = CPU_VENDOR_UNKNOWN; > + cpuid_brand_get(cpu_brand, sizeof(cpu_brand)); > + if (strstr(cpu_brand, "AMD")) > + cpu_vendor = CPU_VENDOR_AMD; > + if (strstr(cpu_brand, "Intel")) > + cpu_vendor = CPU_VENDOR_INTEL; > + > + switch (cpu_vendor) { > + case CPU_VENDOR_AMD: > + type = AMD_MCE_MEM; > + break; > + case CPU_VENDOR_INTEL: > + type = INTEL_MCE_SRAO_MEM; > + break; > + case CPU_VENDOR_UNKNOWN: > + default: > + Lprintf("Unknown cpu vendor on this machine\n"); > + exit(EXIT_FAILURE); > + } > + > init_msr_inj(); > xc_handle = xc_interface_open(0, 0, 0); > if ( !xc_handle ) { > @@ -571,8 +787,8 @@ int main(int argc, char *argv[]) > exit(EXIT_FAILURE); > } > > - while ( 1 ) { > - c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, &opt_index); > + for (;;) { > + c = getopt_long(argc, argv, "c:Dd:t:hp:r:e", opts, > &opt_index); if ( c == -1 ) > break; > switch ( c ) { > @@ -580,23 +796,26 @@ int main(int argc, char *argv[]) > dump=1; > break; > case ''c'': > - cpu_nr = strtol(optarg, &optarg, 10); > + cpu_nr = strtoul(optarg, &optarg, 0); > if ( strlen(optarg) != 0 ) > err(xc_handle, "Please input a digit parameter for > CPU\n"); break; > case ''d'': > - domid = strtol(optarg, &optarg, 10); > + domid = strtoul(optarg, &optarg, 0); > if ( strlen(optarg) != 0 ) > err(xc_handle, "Please input a digit parameter for > domain\n"); > break; > case ''p'': > - gaddr = strtol(optarg, &optarg, 0); > + gaddr = strtoul(optarg, &optarg, 0); > if ( strlen(optarg) != 0 ) > err(xc_handle, "Please input correct page > address\n"); break; > case ''t'': > type = strtol(optarg, NULL, 0); > break; > + case ''e'': > + opt_exception = 1; > + break; > case ''h'': > default: > help(); > @@ -627,19 +846,36 @@ int main(int argc, char *argv[]) > goto out; > } > > - switch ( type ) > - { > - case MCE_SRAO_MEM: > - inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); > - break; > - case MCE_SRAO_LLC: > - inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); > - break; > - case CMCI_UCNA_LLC: > - inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); > + switch ( cpu_vendor ) { > + case CPU_VENDOR_INTEL: > + switch ( type ) { > + case INTEL_MCE_SRAO_MEM: > + intel_inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); > + break; > + case INTEL_MCE_SRAO_LLC: > + intel_inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); > + break; > + case INTEL_CMCI_UCNA_LLC: > + intel_inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); > + break; > + default: > + err(xc_handle, "Unsupported error type\n"); > + break; > + } > break; > - default: > - err(xc_handle, "Unsupported error type\n"); > + > + case CPU_VENDOR_AMD: > + switch ( type ) { > + case AMD_MCE_MEM: > + amd_inject_mem(xc_handle, cpu_nr, domid, gaddr); > + break; > + case AMD_MCE_L3: > + amd_inject_l3(xc_handle, cpu_nr, domid, gaddr); > + break; > + default: > + err(xc_handle, "Unsupported error type\n"); > + break; > + } > break; > }
Ian Campbell wrote:> On Fri, 2013-05-31 at 09:30 +0100, Jan Beulich wrote: >>>>> On 30.05.13 at 16:33, "Egger, Christoph" <chegger@amazon.de> >>>>> wrote: >>> a8206654c64f:patches chegger$ cat xen_mceinj.diff >>> commit a277555e158c87aed34196f72eba0a4cf8f0fb38 >>> Author: Christoph Egger <chegger@amazon.de> >>> Date: Wed Feb 27 14:52:19 2013 +0000 >>> >>> xen-mceinj: Support AMD. Add -e option. >>> >>> Add support for AMD. >>> Add -e option to raise an exception. >>> >>> Signed-off-by: Christoph Egger <chegger@amazon.de> >> >> Apart from needing an ack from someone at Intel, > > I''m not sure this needs to be all that strictly enforced for what is > AIUI basically a devtest tool. Having the ACK would be better than not > having it, but this patch has IIRC been posted several times over a 6 > month period without comment from Intel and there''s no reason to hold > it up indefinitely waiting for them IMHO. > > I''m not sure if Intel were CCd prior to this particular posting -- if > not then we should give them a chance to respond but otherwise I think > we should take this patch once your other comments are addressed. > > Ian.Sorry, my fault. Have sent out comments. Thanks, Jinsong
>>> On 31.05.13 at 13:31, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Egger, Christoph wrote: >> @@ -44,11 +47,14 @@ >> #define MCi_type_STATUS 0x1 >> #define MCi_type_ADDR 0x2 >> #define MCi_type_MISC 0x3 >> -#define MCi_type_CTL2 0x4 >> +#define MC4_type_MISC1 0x4 >> +#define MC4_type_MISC2 0x5 >> +#define MC4_type_MISC3 0x6 >> +#define MCi_type_CTL2 0x7 > > Why change original sequence? I didn''t see the necessity of doing so --> please > add new types behind old ones so that we don''t need test old logic again.Actually I think I''m with Christoph here - keeping the MSIC ones together makes more sense than having unmanageable mixture of things in arbitrary order. If the original code was written half way properly, then I also don''t see any re-testing need - there shouldn''t be any dependencies on the particular values these symbols have (after all that''s what they were - supposedly - introduced for). Jan
On 31.05.13 13:31, Liu, Jinsong wrote:> Seems it cannot patch to latest tree, so please rebase (I agree it''s our fault not response in time ... sorry). > Some comments below. > > Thanks, > Jinsong > > Egger, Christoph wrote: >> a8206654c64f:patches chegger$ cat xen_mceinj.diff >> commit a277555e158c87aed34196f72eba0a4cf8f0fb38 >> Author: Christoph Egger <chegger@amazon.de> >> Date: Wed Feb 27 14:52:19 2013 +0000 >> >> xen-mceinj: Support AMD. Add -e option. >> >> Add support for AMD. > > It''s pretty OK for this purpose, but ... > >> Add -e option to raise an exception. > > ... why need -e option w/ opt_exception variable? > AFAICT, it works fine w/o -e. Please add more comments > for strong reason, especially it need force user to change command line.Without -e this allows you to test the polling handler and -e this allows you to test the exception handler.>> >> Signed-off-by: Christoph Egger <chegger@amazon.de> >> >> diff --git a/tools/tests/mce-test/tools/xen-mceinj.c >> b/tools/tests/mce-test/tools/xen-mceinj.c >> index e3e62f7..7400a02 100644 >> --- a/tools/tests/mce-test/tools/xen-mceinj.c >> +++ b/tools/tests/mce-test/tools/xen-mceinj.c >> @@ -1,6 +1,8 @@ >> /* >> * xen-mceinj.c: utilities to inject fake MCE for x86. >> * Copyright (c) 2010, Intel Corporation. >> + * Copyright (c) 2012, AMD Cooperation Inc. >> + * Copyright (c) 2013, Amazon.com, Inc. or its affiliates. >> * >> * This program is free software; you can redistribute it and/or >> modify it >> * under the terms and conditions of the GNU General Public License, >> @@ -18,6 +20,7 @@ >> * Authors: Yunhong Jiang <yunhong.jiang@intel.com> >> * Haicheng Li <haicheng.li@intel.com> >> * Xudong Hao <xudong.hao@intel.com> >> + * Christoph Egger <chegger@amazon.de> >> */ >> >> >> @@ -44,11 +47,14 @@ >> #define MCi_type_STATUS 0x1 >> #define MCi_type_ADDR 0x2 >> #define MCi_type_MISC 0x3 >> -#define MCi_type_CTL2 0x4 >> +#define MC4_type_MISC1 0x4 >> +#define MC4_type_MISC2 0x5 >> +#define MC4_type_MISC3 0x6 >> +#define MCi_type_CTL2 0x7 > > Why change original sequence? I didn''t see the necessity of doing so --> please add new types behind old ones so that we don''t need test old logic again.See Jan''s comment.>> >> #define INVALID_MSR ~0UL >> >> -/* Intel MSRs */ >> +/* X86 machine check MSRs */ >> #define MSR_IA32_MCG_CAP 0x00000179 >> #define MSR_IA32_MCG_STATUS 0x0000017a >> #define MSR_IA32_MCG_CTL 0x0000017b >> @@ -56,35 +62,63 @@ >> #define MSR_IA32_MC0_STATUS 0x00000401 >> #define MSR_IA32_MC0_ADDR 0x00000402 >> #define MSR_IA32_MC0_MISC 0x00000403 >> + >> +/* Intel MSRs */ >> #define MSR_IA32_MC0_CTL2 0x00000280 >> >> -/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */ >> +/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE >> */ #define MCG_STATUS_SRAO_LLC_VAL 0x5 >> #define MCE_SRAO_LLC_BANK 0x7 >> #define MCi_STATUS_SRAO_LLC_VAL 0xBD2000008000017AUL >> #define MCi_MISC_SRAO_LLC_VAL 0x86UL >> >> -/* Memory Patrol Scrub SRAO MCE */ >> +/* Intel: Memory Patrol Scrub SRAO MCE */ >> #define MCG_STATUS_SRAO_MEM_VAL 0x5 >> #define MCE_SRAO_MEM_BANK 0x8 >> #define MCi_STATUS_SRAO_MEM_VAL 0xBD000000004000CFUL >> #define MCi_MISC_SRAO_MEM_VAL 0x86UL >> >> -/* LLC EWB UCNA Error */ >> +/* Intel: LLC EWB UCNA Error */ >> #define MCG_STATUS_UCNA_LLC_VAL 0x0 >> #define CMCI_UCNA_LLC_BANK 0x9 >> #define MCi_STATUS_UCNA_LLC_VAL 0xBC20000080000136UL >> #define MCi_MISC_UCNA_LLC_VAL 0x86UL >> >> -/* Error Types */ >> -#define MCE_SRAO_MEM 0x0 >> -#define MCE_SRAO_LLC 0x1 >> -#define CMCI_UCNA_LLC 0x2 >> +/* Intel: Error Types */ >> +#define INTEL_MCE_SRAO_MEM 0x0 >> +#define INTEL_MCE_SRAO_LLC 0x1 >> +#define INTEL_CMCI_UCNA_LLC 0x2 >> + >> +/* AMD: Memory Error */ >> +#define MCG_STATUS_MEM_VAL 0x5 >> +#define MCE_MEM_BANK 0x4 >> +#define MCi_STATUS_MEM_VAL 0xb4000000001c0100UL >> +//#define MCi_STATUS_MEM_VAL 0xb600000000000100UL > > Hmm, drop this test code please.What do you not like? The // ? Christoph> >> +#define MCi_MISC_MEM_VAL 0x0 >> + >> +/* AMD: L3 Cache Error */ >> +#define MCG_STATUS_L3_VAL 0x5 >> +#define MCE_L3_BANK 0x4 >> +#define MCi_STATUS_L3_VAL 0xbc000400001c010bULL >> +#define MC4_MISC0_VAL 0x0 >> +#define MC4_MISC1_VAL 0x0 >> +#define MC4_MISC2_L3_VAL 0xc008000000000003ULL >> + >> +/* AMD: Error Types */ >> +#define AMD_MCE_MEM 0x0 /* memory error */ >> +#define AMD_MCE_L3 0x1 /* l3 cache */ >> >> #define LOGFILE stdout >> >> -int dump; >> -struct xen_mc_msrinject msr_inj; >> +static int dump; >> +static int opt_exception; >> +static struct xen_mc_msrinject msr_inj; >> + >> +#define CPU_VENDOR_UNKNOWN -1 >> +#define CPU_VENDOR_AMD 0 >> +#define CPU_VENDOR_INTEL 1 >> +static int cpu_vendor; >> + >> >> static void Lprintf(const char *fmt, ...) >> { >> @@ -145,7 +179,7 @@ static int mca_cpuinfo(xc_interface *xc_handle) >> return 0; >> } >> >> -static int inject_cmci(xc_interface *xc_handle, int cpu_nr) >> +static int intel_inject_cmci(xc_interface *xc_handle) >> { >> struct xen_mc mc; >> int nr_cpus; >> @@ -191,6 +225,15 @@ static uint64_t bank_addr(int bank, int type) >> case MCi_type_MISC: >> addr = MSR_IA32_MC0_CTL + (bank * 4) + type; >> break; >> + case MC4_type_MISC1: >> + addr = 0xc0000408; >> + break; >> + case MC4_type_MISC2: >> + addr = 0xc0000409; >> + break; >> + case MC4_type_MISC3: >> + addr = 0xc000040a; >> + break; >> case MCi_type_CTL2: >> addr = MSR_IA32_MC0_CTL2 + bank; >> break; >> @@ -356,12 +399,11 @@ static int inject_mci_status(xc_interface >> *xc_handle, } >> >> static int inject_mci_misc(xc_interface *xc_handle, >> - uint32_t cpu_nr, >> - uint64_t bank, >> - uint64_t val) >> + uint32_t cpu_nr, uint32_t misctype, >> + uint64_t bank, uint64_t val) >> { >> return add_msr_bank_intpose(xc_handle, cpu_nr, >> MC_MSRINJ_F_INTERPOSE, - >> MCi_type_MISC, bank, val); + >> MCi_type_MISC + misctype, bank, val); } >> >> static int inject_mci_addr(xc_interface *xc_handle, >> @@ -373,10 +415,8 @@ static int inject_mci_addr(xc_interface >> *xc_handle, MCi_type_ADDR, bank, >> val); } >> >> -static int inject_llc_srao(xc_interface *xc_handle, >> - uint32_t cpu_nr, >> - uint32_t domain, >> - uint64_t gaddr) >> +static int intel_inject_llc_srao(xc_interface *xc_handle, >> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) >> { >> uint64_t gpfn, mfn, haddr; >> int ret = 0; >> @@ -390,7 +430,7 @@ static int inject_llc_srao(xc_interface >> *xc_handle, if ( ret ) >> err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); >> >> - ret = inject_mci_misc(xc_handle, cpu_nr, >> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >> MCE_SRAO_LLC_BANK, MCi_MISC_SRAO_LLC_VAL); >> if ( ret ) >> err(xc_handle, "Failed to inject MCi_MISC MSR\n"); >> @@ -407,17 +447,18 @@ static int inject_llc_srao(xc_interface >> *xc_handle, ret = flush_msr_inj(xc_handle); >> if ( ret ) >> err(xc_handle, "Failed to inject MSR\n"); >> - ret = inject_mce(xc_handle, cpu_nr); >> - if ( ret ) >> - err(xc_handle, "Failed to inject MCE error\n"); >> + >> + if (opt_exception) { >> + ret = inject_mce(xc_handle, cpu_nr); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCE error\n"); >> + } >> >> return 0; >> } >> >> -static int inject_mem_srao(xc_interface *xc_handle, >> - uint32_t cpu_nr, >> - uint32_t domain, >> - uint64_t gaddr) >> +static int intel_inject_mem_srao(xc_interface *xc_handle, >> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) >> { >> uint64_t gpfn, mfn, haddr; >> int ret = 0; >> @@ -431,7 +472,7 @@ static int inject_mem_srao(xc_interface >> *xc_handle, if ( ret ) >> err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); >> >> - ret = inject_mci_misc(xc_handle, cpu_nr, >> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >> MCE_SRAO_MEM_BANK, MCi_MISC_SRAO_MEM_VAL); >> if ( ret ) >> err(xc_handle, "Failed to inject MCi_MISC MSR\n"); >> @@ -448,17 +489,18 @@ static int inject_mem_srao(xc_interface >> *xc_handle, ret = flush_msr_inj(xc_handle); >> if ( ret ) >> err(xc_handle, "Failed to inject MSR\n"); >> - ret = inject_mce(xc_handle, cpu_nr); >> - if ( ret ) >> - err(xc_handle, "Failed to inject MCE error\n"); >> + >> + if (opt_exception) { >> + ret = inject_mce(xc_handle, cpu_nr); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCE error\n"); >> + } >> >> return 0; >> } >> >> -static int inject_llc_ucna(xc_interface *xc_handle, >> - uint32_t cpu_nr, >> - uint32_t domain, >> - uint64_t gaddr) >> +static int intel_inject_llc_ucna(xc_interface *xc_handle, >> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) >> { >> uint64_t gpfn, mfn, haddr; >> int ret = 0; >> @@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface >> *xc_handle, if ( ret ) >> err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); >> >> - ret = inject_mci_misc(xc_handle, cpu_nr, >> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >> CMCI_UCNA_LLC_BANK, MCi_MISC_UCNA_LLC_VAL); >> if ( ret ) >> err(xc_handle, "Failed to inject MCi_MISC MSR\n"); >> @@ -489,13 +531,108 @@ static int inject_llc_ucna(xc_interface >> *xc_handle, ret = flush_msr_inj(xc_handle); >> if ( ret ) >> err(xc_handle, "Failed to inject MSR\n"); >> - ret = inject_cmci(xc_handle, cpu_nr); >> + ret = intel_inject_cmci(xc_handle); >> if ( ret ) >> err(xc_handle, "Failed to inject MCE error\n"); >> >> return 0; >> } >> >> +static int amd_inject_mem(xc_interface *xc_handle, >> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) >> +{ >> + uint64_t gpfn, mfn, haddr; >> + int ret = 0; >> + >> + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_MEM_VAL); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCG_STATUS MSR\n"); >> + >> + ret = inject_mci_status(xc_handle, cpu_nr, >> + MCE_MEM_BANK, MCi_STATUS_MEM_VAL); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); >> + >> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >> + MCE_MEM_BANK, MCi_MISC_MEM_VAL); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCi_MISC MSR\n"); >> + >> + gpfn = gaddr >> PAGE_SHIFT; >> + mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn); >> + if (!mfn_valid(mfn)) >> + err(xc_handle, "The MFN is not valid\n"); >> + haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1)); >> + ret = inject_mci_addr(xc_handle, cpu_nr, MCE_MEM_BANK, haddr); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCi_ADDR MSR\n"); >> + >> + ret = flush_msr_inj(xc_handle); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MSR\n"); >> + >> + if (opt_exception) { >> + ret = inject_mce(xc_handle, cpu_nr); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCE error\n"); >> + } >> + >> + return 0; >> +} >> + >> +static int amd_inject_l3(xc_interface *xc_handle, >> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) >> +{ >> + uint64_t gpfn, mfn, haddr; >> + int ret = 0; >> + >> + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCG_STATUS MSR\n"); >> + >> + ret = inject_mci_status(xc_handle, cpu_nr, >> + MCE_L3_BANK, MCi_STATUS_L3_VAL); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); >> + >> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >> + MCE_L3_BANK, MC4_MISC0_VAL); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MC4_MISC0 MSR\n"); >> + >> + ret = inject_mci_misc(xc_handle, cpu_nr, 1, >> + MCE_L3_BANK, MC4_MISC1_VAL); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MC4_MISC1 MSR\n"); >> + >> + ret = inject_mci_misc(xc_handle, cpu_nr, 2, >> + MCE_L3_BANK, MC4_MISC2_L3_VAL); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MC4_MISC2 MSR\n"); >> + >> + gpfn = gaddr >> PAGE_SHIFT; >> + mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn); >> + if (!mfn_valid(mfn)) >> + err(xc_handle, "The MFN is not valid\n"); >> + haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1)); >> + ret = inject_mci_addr(xc_handle, cpu_nr, MCE_L3_BANK, haddr); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCi_ADDR MSR\n"); >> + >> + ret = flush_msr_inj(xc_handle); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MSR\n"); >> + >> + if (opt_exception) { >> + ret = inject_mce(xc_handle, cpu_nr); >> + if ( ret ) >> + err(xc_handle, "Failed to inject MCE error\n"); >> + } >> + >> + return 0; >> +} >> + >> + >> static long xs_get_dom_mem(int domid) >> { >> char path[128]; >> @@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid) >> if (!xs) >> return -1; >> >> - sprintf(path, "/local/domain/%d/memory/target", domid); >> + snprintf(path, sizeof(path), "/local/domain/%d/memory/target", >> domid); memstr = xs_read(xs, XBT_NULL, path, &plen); >> xs_daemon_close(xs); >> >> @@ -540,30 +677,109 @@ static void help(void) >> " -D, --dump dump addr info without error >> injection\n" >> " -c, --cpu=CPU_ID target CPU\n" >> " -d, --domain=DomID target domain, the default is Xen >> itself\n" >> + " -e raise MCE exception\n" >> " -h, --help print this page\n" >> " -p, --phyaddr physical address\n" >> - " -t, --type=error error type\n" >> - " 0 : MCE_SRAO_MEM\n" >> - " 1 : MCE_SRAO_LLC\n" >> - " 2 : CMCI_UCNA_LLC\n" >> - "\n" >> ); >> + >> + if (cpu_vendor == CPU_VENDOR_INTEL) { >> + printf( >> + " -t, --type=error error type\n" >> + " 0x0 : SRAO MEM\n" >> + " 0x1 : SRAO LLC\n" >> + " 0x2 : CMCI UCNA LLC\n"); >> + } >> + if (cpu_vendor == CPU_VENDOR_AMD) { >> + printf( >> + " -t, --type=error error type\n" >> + " 0x0: DRAM error\n" >> + " 0x1: L3 cache error\n"); >> + } >> + printf("\n"); >> +} >> + >> +static void cpuid(const unsigned int *input, unsigned int regs[4]) >> +{ >> + unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : >> input[1]; >> +#ifdef __i386__ >> + /* Use the stack to avoid reg constraint failures with some gcc >> flags */ >> + asm ( >> + "push %%ebx; push %%edx\n\t" >> + "cpuid\n\t" >> + "mov %%ebx,4(%4)\n\t" >> + "mov %%edx,12(%4)\n\t" >> + "pop %%edx; pop %%ebx\n\t" >> + : "=a" (regs[0]), "=c" (regs[2]) >> + : "0" (input[0]), "1" (count), "S" (regs) >> + : "memory" ); >> +#else >> + asm ( >> + "cpuid" >> + : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" >> (regs[3]) + : "0" (input[0]), "2" (count) ); >> +#endif >> +} >> + >> +/* Get the manufacturer brand name of the host processor. */ >> +static void cpuid_brand_get(char *str, size_t len) >> +{ >> + unsigned int input[2] = { 0, 0 }; >> + union { >> + unsigned int regs[4]; >> + struct { >> + char eax[4]; >> + char ebx[4]; >> + char ecx[4]; >> + char edx[4]; >> + } str_regs; >> + } cpu_branding; >> + >> + cpuid(input, cpu_branding.regs); >> + >> + snprintf(str, len, "%c%c%c%c%c%c%c%c%c%c%c%c", >> + cpu_branding.str_regs.ebx[0], cpu_branding.str_regs.ebx[1], >> + cpu_branding.str_regs.ebx[2], cpu_branding.str_regs.ebx[3], >> + cpu_branding.str_regs.edx[0], cpu_branding.str_regs.edx[1], >> + cpu_branding.str_regs.edx[2], cpu_branding.str_regs.edx[3], >> + cpu_branding.str_regs.ecx[0], cpu_branding.str_regs.ecx[1], >> + cpu_branding.str_regs.ecx[2], cpu_branding.str_regs.ecx[3]); >> } >> >> int main(int argc, char *argv[]) >> { >> - int type = MCE_SRAO_MEM; >> + int type; >> int c, opt_index; >> uint32_t domid; >> xc_interface *xc_handle; >> - int cpu_nr; >> - int64_t gaddr, gpfn, mfn, haddr, max_gpa; >> + unsigned int cpu_nr; >> + uint64_t gaddr, gpfn, mfn, haddr, max_gpa; >> + char cpu_brand[13]; >> >> /* Default Value */ >> domid = DOMID_XEN; >> gaddr = 0x180020; >> cpu_nr = 0; >> >> + cpu_vendor = CPU_VENDOR_UNKNOWN; >> + cpuid_brand_get(cpu_brand, sizeof(cpu_brand)); >> + if (strstr(cpu_brand, "AMD")) >> + cpu_vendor = CPU_VENDOR_AMD; >> + if (strstr(cpu_brand, "Intel")) >> + cpu_vendor = CPU_VENDOR_INTEL; >> + >> + switch (cpu_vendor) { >> + case CPU_VENDOR_AMD: >> + type = AMD_MCE_MEM; >> + break; >> + case CPU_VENDOR_INTEL: >> + type = INTEL_MCE_SRAO_MEM; >> + break; >> + case CPU_VENDOR_UNKNOWN: >> + default: >> + Lprintf("Unknown cpu vendor on this machine\n"); >> + exit(EXIT_FAILURE); >> + } >> + >> init_msr_inj(); >> xc_handle = xc_interface_open(0, 0, 0); >> if ( !xc_handle ) { >> @@ -571,8 +787,8 @@ int main(int argc, char *argv[]) >> exit(EXIT_FAILURE); >> } >> >> - while ( 1 ) { >> - c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, &opt_index); >> + for (;;) { >> + c = getopt_long(argc, argv, "c:Dd:t:hp:r:e", opts, >> &opt_index); if ( c == -1 ) >> break; >> switch ( c ) { >> @@ -580,23 +796,26 @@ int main(int argc, char *argv[]) >> dump=1; >> break; >> case ''c'': >> - cpu_nr = strtol(optarg, &optarg, 10); >> + cpu_nr = strtoul(optarg, &optarg, 0); >> if ( strlen(optarg) != 0 ) >> err(xc_handle, "Please input a digit parameter for >> CPU\n"); break; >> case ''d'': >> - domid = strtol(optarg, &optarg, 10); >> + domid = strtoul(optarg, &optarg, 0); >> if ( strlen(optarg) != 0 ) >> err(xc_handle, "Please input a digit parameter for >> domain\n"); >> break; >> case ''p'': >> - gaddr = strtol(optarg, &optarg, 0); >> + gaddr = strtoul(optarg, &optarg, 0); >> if ( strlen(optarg) != 0 ) >> err(xc_handle, "Please input correct page >> address\n"); break; >> case ''t'': >> type = strtol(optarg, NULL, 0); >> break; >> + case ''e'': >> + opt_exception = 1; >> + break; >> case ''h'': >> default: >> help(); >> @@ -627,19 +846,36 @@ int main(int argc, char *argv[]) >> goto out; >> } >> >> - switch ( type ) >> - { >> - case MCE_SRAO_MEM: >> - inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); >> - break; >> - case MCE_SRAO_LLC: >> - inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); >> - break; >> - case CMCI_UCNA_LLC: >> - inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); >> + switch ( cpu_vendor ) { >> + case CPU_VENDOR_INTEL: >> + switch ( type ) { >> + case INTEL_MCE_SRAO_MEM: >> + intel_inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); >> + break; >> + case INTEL_MCE_SRAO_LLC: >> + intel_inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); >> + break; >> + case INTEL_CMCI_UCNA_LLC: >> + intel_inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); >> + break; >> + default: >> + err(xc_handle, "Unsupported error type\n"); >> + break; >> + } >> break; >> - default: >> - err(xc_handle, "Unsupported error type\n"); >> + >> + case CPU_VENDOR_AMD: >> + switch ( type ) { >> + case AMD_MCE_MEM: >> + amd_inject_mem(xc_handle, cpu_nr, domid, gaddr); >> + break; >> + case AMD_MCE_L3: >> + amd_inject_l3(xc_handle, cpu_nr, domid, gaddr); >> + break; >> + default: >> + err(xc_handle, "Unsupported error type\n"); >> + break; >> + } >> break; >> } >
Jan Beulich wrote:>>>> On 31.05.13 at 13:31, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Egger, Christoph wrote: >>> @@ -44,11 +47,14 @@ >>> #define MCi_type_STATUS 0x1 >>> #define MCi_type_ADDR 0x2 >>> #define MCi_type_MISC 0x3 >>> -#define MCi_type_CTL2 0x4 >>> +#define MC4_type_MISC1 0x4 >>> +#define MC4_type_MISC2 0x5 >>> +#define MC4_type_MISC3 0x6 >>> +#define MCi_type_CTL2 0x7 >> >> Why change original sequence? I didn''t see the necessity of doing so >> --> please add new types behind old ones so that we don''t need test >> old logic again. > > Actually I think I''m with Christoph here - keeping the MSIC ones > together makes more sense than having unmanageable mixture of > things in arbitrary order. > > If the original code was written half way properly, then I also don''t > see any re-testing need - there shouldn''t be any dependencies on > the particular values these symbols have (after all that''s what they > were - supposedly - introduced for). > > JanOK, it make sense to me. Thanks, Jinsong
>>> On 31.05.13 at 16:13, Christoph Egger <chegger@amazon.de> wrote: > On 31.05.13 13:31, Liu, Jinsong wrote: >>> +/* AMD: Memory Error */ >>> +#define MCG_STATUS_MEM_VAL 0x5 >>> +#define MCE_MEM_BANK 0x4 >>> +#define MCi_STATUS_MEM_VAL 0xb4000000001c0100UL >>> +//#define MCi_STATUS_MEM_VAL 0xb600000000000100UL >> >> Hmm, drop this test code please. > > What do you not like? The // ?Here I agree with Jinsong - without comment explaining what the commented out line could be used for, it should be removed. And if it is to be kept, it should - together with the explanation - go into a proper C-style comment. Jan
Christoph Egger wrote:> On 31.05.13 13:31, Liu, Jinsong wrote: >> Seems it cannot patch to latest tree, so please rebase (I agree it''s >> our fault not response in time ... sorry). >> Some comments below. >> >> Thanks, >> Jinsong >> >> Egger, Christoph wrote: >>> a8206654c64f:patches chegger$ cat xen_mceinj.diff >>> commit a277555e158c87aed34196f72eba0a4cf8f0fb38 >>> Author: Christoph Egger <chegger@amazon.de> >>> Date: Wed Feb 27 14:52:19 2013 +0000 >>> >>> xen-mceinj: Support AMD. Add -e option. >>> >>> Add support for AMD. >> >> It''s pretty OK for this purpose, but ... >> >>> Add -e option to raise an exception. >> >> ... why need -e option w/ opt_exception variable? >> AFAICT, it works fine w/o -e. Please add more comments >> for strong reason, especially it need force user to change command >> line. > > Without -e this allows you to test the polling handler and -e this > allows you to test the exception handler.If for new polling purpose, isn''t it more reasonable to add another explicit option for polling? In this way we keep same style/interface with old one, defaultly test mce. What is more, please add description (especially the --help) for polling purpose -- I didn''t find it from code review or from description.> >>> >>> Signed-off-by: Christoph Egger <chegger@amazon.de> >>> >>> diff --git a/tools/tests/mce-test/tools/xen-mceinj.c >>> b/tools/tests/mce-test/tools/xen-mceinj.c >>> index e3e62f7..7400a02 100644 >>> --- a/tools/tests/mce-test/tools/xen-mceinj.c >>> +++ b/tools/tests/mce-test/tools/xen-mceinj.c >>> @@ -1,6 +1,8 @@ >>> /* >>> * xen-mceinj.c: utilities to inject fake MCE for x86. >>> * Copyright (c) 2010, Intel Corporation. >>> + * Copyright (c) 2012, AMD Cooperation Inc. >>> + * Copyright (c) 2013, Amazon.com, Inc. or its affiliates. * >>> * This program is free software; you can redistribute it and/or >>> modify it >>> * under the terms and conditions of the GNU General Public >>> License, @@ -18,6 +20,7 @@ >>> * Authors: Yunhong Jiang <yunhong.jiang@intel.com> >>> * Haicheng Li <haicheng.li@intel.com> >>> * Xudong Hao <xudong.hao@intel.com> >>> + * Christoph Egger <chegger@amazon.de> >>> */ >>> >>> >>> @@ -44,11 +47,14 @@ >>> #define MCi_type_STATUS 0x1 >>> #define MCi_type_ADDR 0x2 >>> #define MCi_type_MISC 0x3 >>> -#define MCi_type_CTL2 0x4 >>> +#define MC4_type_MISC1 0x4 >>> +#define MC4_type_MISC2 0x5 >>> +#define MC4_type_MISC3 0x6 >>> +#define MCi_type_CTL2 0x7 >> >> Why change original sequence? I didn''t see the necessity of doing so >> --> please add new types behind old ones so that we don''t need test >> old logic again. > > See Jan''s comment.Agree.> >>> >>> #define INVALID_MSR ~0UL >>> >>> -/* Intel MSRs */ >>> +/* X86 machine check MSRs */ >>> #define MSR_IA32_MCG_CAP 0x00000179 >>> #define MSR_IA32_MCG_STATUS 0x0000017a >>> #define MSR_IA32_MCG_CTL 0x0000017b >>> @@ -56,35 +62,63 @@ >>> #define MSR_IA32_MC0_STATUS 0x00000401 >>> #define MSR_IA32_MC0_ADDR 0x00000402 >>> #define MSR_IA32_MC0_MISC 0x00000403 >>> + >>> +/* Intel MSRs */ >>> #define MSR_IA32_MC0_CTL2 0x00000280 >>> >>> -/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */ >>> +/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE >>> */ #define MCG_STATUS_SRAO_LLC_VAL 0x5 >>> #define MCE_SRAO_LLC_BANK 0x7 >>> #define MCi_STATUS_SRAO_LLC_VAL 0xBD2000008000017AUL >>> #define MCi_MISC_SRAO_LLC_VAL 0x86UL >>> >>> -/* Memory Patrol Scrub SRAO MCE */ >>> +/* Intel: Memory Patrol Scrub SRAO MCE */ >>> #define MCG_STATUS_SRAO_MEM_VAL 0x5 >>> #define MCE_SRAO_MEM_BANK 0x8 >>> #define MCi_STATUS_SRAO_MEM_VAL 0xBD000000004000CFUL >>> #define MCi_MISC_SRAO_MEM_VAL 0x86UL >>> >>> -/* LLC EWB UCNA Error */ >>> +/* Intel: LLC EWB UCNA Error */ >>> #define MCG_STATUS_UCNA_LLC_VAL 0x0 >>> #define CMCI_UCNA_LLC_BANK 0x9 >>> #define MCi_STATUS_UCNA_LLC_VAL 0xBC20000080000136UL >>> #define MCi_MISC_UCNA_LLC_VAL 0x86UL >>> >>> -/* Error Types */ >>> -#define MCE_SRAO_MEM 0x0 >>> -#define MCE_SRAO_LLC 0x1 >>> -#define CMCI_UCNA_LLC 0x2 >>> +/* Intel: Error Types */ >>> +#define INTEL_MCE_SRAO_MEM 0x0 >>> +#define INTEL_MCE_SRAO_LLC 0x1 >>> +#define INTEL_CMCI_UCNA_LLC 0x2 >>> + >>> +/* AMD: Memory Error */ >>> +#define MCG_STATUS_MEM_VAL 0x5 >>> +#define MCE_MEM_BANK 0x4 >>> +#define MCi_STATUS_MEM_VAL 0xb4000000001c0100UL >>> +//#define MCi_STATUS_MEM_VAL 0xb600000000000100UL >> >> Hmm, drop this test code please. > > What do you not like? The // ? > > ChristophYes, If you like to keep test code, /* */ seems better -- Just my personal prefer. Thanks, Jinsong> >> >>> +#define MCi_MISC_MEM_VAL 0x0 >>> + >>> +/* AMD: L3 Cache Error */ >>> +#define MCG_STATUS_L3_VAL 0x5 >>> +#define MCE_L3_BANK 0x4 >>> +#define MCi_STATUS_L3_VAL 0xbc000400001c010bULL >>> +#define MC4_MISC0_VAL 0x0 >>> +#define MC4_MISC1_VAL 0x0 >>> +#define MC4_MISC2_L3_VAL 0xc008000000000003ULL + >>> +/* AMD: Error Types */ >>> +#define AMD_MCE_MEM 0x0 /* memory error */ >>> +#define AMD_MCE_L3 0x1 /* l3 cache */ >>> >>> #define LOGFILE stdout >>> >>> -int dump; >>> -struct xen_mc_msrinject msr_inj; >>> +static int dump; >>> +static int opt_exception; >>> +static struct xen_mc_msrinject msr_inj; >>> + >>> +#define CPU_VENDOR_UNKNOWN -1 >>> +#define CPU_VENDOR_AMD 0 >>> +#define CPU_VENDOR_INTEL 1 >>> +static int cpu_vendor; >>> + >>> >>> static void Lprintf(const char *fmt, ...) >>> { >>> @@ -145,7 +179,7 @@ static int mca_cpuinfo(xc_interface *xc_handle) >>> return 0; } >>> >>> -static int inject_cmci(xc_interface *xc_handle, int cpu_nr) >>> +static int intel_inject_cmci(xc_interface *xc_handle) { >>> struct xen_mc mc; >>> int nr_cpus; >>> @@ -191,6 +225,15 @@ static uint64_t bank_addr(int bank, int type) >>> case MCi_type_MISC: addr = MSR_IA32_MC0_CTL + (bank * >>> 4) + type; break; + case MC4_type_MISC1: >>> + addr = 0xc0000408; >>> + break; >>> + case MC4_type_MISC2: >>> + addr = 0xc0000409; >>> + break; >>> + case MC4_type_MISC3: >>> + addr = 0xc000040a; >>> + break; >>> case MCi_type_CTL2: >>> addr = MSR_IA32_MC0_CTL2 + bank; >>> break; >>> @@ -356,12 +399,11 @@ static int inject_mci_status(xc_interface >>> *xc_handle, } >>> >>> static int inject_mci_misc(xc_interface *xc_handle, >>> - uint32_t cpu_nr, >>> - uint64_t bank, >>> - uint64_t val) >>> + uint32_t cpu_nr, uint32_t misctype, >>> + uint64_t bank, uint64_t val) { >>> return add_msr_bank_intpose(xc_handle, cpu_nr, >>> MC_MSRINJ_F_INTERPOSE, - MCi_type_MISC, bank, val); + >>> MCi_type_MISC + misctype, bank, val); } >>> >>> static int inject_mci_addr(xc_interface *xc_handle, >>> @@ -373,10 +415,8 @@ static int inject_mci_addr(xc_interface >>> *xc_handle, MCi_type_ADDR, >>> bank, val); } >>> >>> -static int inject_llc_srao(xc_interface *xc_handle, >>> - uint32_t cpu_nr, >>> - uint32_t domain, >>> - uint64_t gaddr) >>> +static int intel_inject_llc_srao(xc_interface *xc_handle, >>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { >>> uint64_t gpfn, mfn, haddr; >>> int ret = 0; >>> @@ -390,7 +430,7 @@ static int inject_llc_srao(xc_interface >>> *xc_handle, if ( ret ) err(xc_handle, "Failed to inject >>> MCi_STATUS MSR\n"); >>> >>> - ret = inject_mci_misc(xc_handle, cpu_nr, >>> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >>> MCE_SRAO_LLC_BANK, >>> MCi_MISC_SRAO_LLC_VAL); if ( ret ) err(xc_handle, >>> "Failed to inject MCi_MISC MSR\n"); @@ -407,17 +447,18 @@ static >>> int inject_llc_srao(xc_interface *xc_handle, ret >>> flush_msr_inj(xc_handle); if ( ret ) >>> err(xc_handle, "Failed to inject MSR\n"); >>> - ret = inject_mce(xc_handle, cpu_nr); >>> - if ( ret ) >>> - err(xc_handle, "Failed to inject MCE error\n"); + >>> + if (opt_exception) { >>> + ret = inject_mce(xc_handle, cpu_nr); >>> + if ( ret ) >>> + err(xc_handle, "Failed to inject MCE error\n"); + } >>> >>> return 0; >>> } >>> >>> -static int inject_mem_srao(xc_interface *xc_handle, >>> - uint32_t cpu_nr, >>> - uint32_t domain, >>> - uint64_t gaddr) >>> +static int intel_inject_mem_srao(xc_interface *xc_handle, >>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { >>> uint64_t gpfn, mfn, haddr; >>> int ret = 0; >>> @@ -431,7 +472,7 @@ static int inject_mem_srao(xc_interface >>> *xc_handle, if ( ret ) err(xc_handle, "Failed to inject >>> MCi_STATUS MSR\n"); >>> >>> - ret = inject_mci_misc(xc_handle, cpu_nr, >>> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >>> MCE_SRAO_MEM_BANK, >>> MCi_MISC_SRAO_MEM_VAL); if ( ret ) err(xc_handle, >>> "Failed to inject MCi_MISC MSR\n"); @@ -448,17 +489,18 @@ static >>> int inject_mem_srao(xc_interface *xc_handle, ret >>> flush_msr_inj(xc_handle); if ( ret ) >>> err(xc_handle, "Failed to inject MSR\n"); >>> - ret = inject_mce(xc_handle, cpu_nr); >>> - if ( ret ) >>> - err(xc_handle, "Failed to inject MCE error\n"); + >>> + if (opt_exception) { >>> + ret = inject_mce(xc_handle, cpu_nr); >>> + if ( ret ) >>> + err(xc_handle, "Failed to inject MCE error\n"); + } >>> >>> return 0; >>> } >>> >>> -static int inject_llc_ucna(xc_interface *xc_handle, >>> - uint32_t cpu_nr, >>> - uint32_t domain, >>> - uint64_t gaddr) >>> +static int intel_inject_llc_ucna(xc_interface *xc_handle, >>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { >>> uint64_t gpfn, mfn, haddr; >>> int ret = 0; >>> @@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface >>> *xc_handle, if ( ret ) err(xc_handle, "Failed to inject >>> MCi_STATUS MSR\n"); >>> >>> - ret = inject_mci_misc(xc_handle, cpu_nr, >>> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >>> CMCI_UCNA_LLC_BANK, >>> MCi_MISC_UCNA_LLC_VAL); if ( ret ) err(xc_handle, >>> "Failed to inject MCi_MISC MSR\n"); @@ -489,13 +531,108 @@ static >>> int inject_llc_ucna(xc_interface *xc_handle, ret >>> flush_msr_inj(xc_handle); if ( ret ) >>> err(xc_handle, "Failed to inject MSR\n"); >>> - ret = inject_cmci(xc_handle, cpu_nr); >>> + ret = intel_inject_cmci(xc_handle); >>> if ( ret ) >>> err(xc_handle, "Failed to inject MCE error\n"); >>> >>> return 0; >>> } >>> >>> +static int amd_inject_mem(xc_interface *xc_handle, >>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ >>> + uint64_t gpfn, mfn, haddr; >>> + int ret = 0; >>> + >>> + ret = inject_mcg_status(xc_handle, cpu_nr, >>> MCG_STATUS_MEM_VAL); + if ( ret ) + err(xc_handle, >>> "Failed to inject MCG_STATUS MSR\n"); + + ret >>> inject_mci_status(xc_handle, cpu_nr, + >>> MCE_MEM_BANK, MCi_STATUS_MEM_VAL); + if ( ret ) + >>> err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); + + ret >>> inject_mci_misc(xc_handle, cpu_nr, 0, + >>> MCE_MEM_BANK, MCi_MISC_MEM_VAL); + if ( ret ) + >>> err(xc_handle, "Failed to inject MCi_MISC MSR\n"); + + gpfn >>> gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, >>> gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN >>> is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & >>> (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, >>> MCE_MEM_BANK, haddr); + if ( ret ) + err(xc_handle, >>> "Failed to inject MCi_ADDR MSR\n"); + + ret >>> flush_msr_inj(xc_handle); + if ( ret ) >>> + err(xc_handle, "Failed to inject MSR\n"); >>> + >>> + if (opt_exception) { >>> + ret = inject_mce(xc_handle, cpu_nr); >>> + if ( ret ) >>> + err(xc_handle, "Failed to inject MCE error\n"); + } >>> + >>> + return 0; >>> +} >>> + >>> +static int amd_inject_l3(xc_interface *xc_handle, >>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ >>> + uint64_t gpfn, mfn, haddr; >>> + int ret = 0; >>> + >>> + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL); >>> + if ( ret ) + err(xc_handle, "Failed to inject >>> MCG_STATUS MSR\n"); + + ret = inject_mci_status(xc_handle, >>> cpu_nr, + MCE_L3_BANK, >>> MCi_STATUS_L3_VAL); + if ( ret ) + err(xc_handle, "Failed >>> to inject MCi_STATUS MSR\n"); + + ret >>> inject_mci_misc(xc_handle, cpu_nr, 0, + >>> MCE_L3_BANK, MC4_MISC0_VAL); + if ( ret ) + >>> err(xc_handle, "Failed to inject MC4_MISC0 MSR\n"); + + ret >>> inject_mci_misc(xc_handle, cpu_nr, 1, + >>> MCE_L3_BANK, MC4_MISC1_VAL); + if ( ret ) + >>> err(xc_handle, "Failed to inject MC4_MISC1 MSR\n"); + + ret >>> inject_mci_misc(xc_handle, cpu_nr, 2, + >>> MCE_L3_BANK, MC4_MISC2_L3_VAL); + if ( ret ) + >>> err(xc_handle, "Failed to inject MC4_MISC2 MSR\n"); + + gpfn >>> gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, >>> gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN >>> is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & >>> (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, >>> MCE_L3_BANK, haddr); + if ( ret ) + err(xc_handle, >>> "Failed to inject MCi_ADDR MSR\n"); + + ret >>> flush_msr_inj(xc_handle); + if ( ret ) >>> + err(xc_handle, "Failed to inject MSR\n"); >>> + >>> + if (opt_exception) { >>> + ret = inject_mce(xc_handle, cpu_nr); >>> + if ( ret ) >>> + err(xc_handle, "Failed to inject MCE error\n"); + } >>> + >>> + return 0; >>> +} >>> + >>> + >>> static long xs_get_dom_mem(int domid) >>> { >>> char path[128]; >>> @@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid) if >>> (!xs) return -1; >>> >>> - sprintf(path, "/local/domain/%d/memory/target", domid); >>> + snprintf(path, sizeof(path), "/local/domain/%d/memory/target", >>> domid); memstr = xs_read(xs, XBT_NULL, path, &plen); >>> xs_daemon_close(xs); >>> >>> @@ -540,30 +677,109 @@ static void help(void) >>> " -D, --dump dump addr info without error >>> injection\n" " -c, --cpu=CPU_ID target CPU\n" >>> " -d, --domain=DomID target domain, the default is >>> Xen itself\n" + " -e raise MCE >>> exception\n" " -h, --help print this page\n" >>> " -p, --phyaddr physical address\n" >>> - " -t, --type=error error type\n" >>> - " 0 : MCE_SRAO_MEM\n" >>> - " 1 : MCE_SRAO_LLC\n" >>> - " 2 : CMCI_UCNA_LLC\n" >>> - "\n" >>> ); >>> + >>> + if (cpu_vendor == CPU_VENDOR_INTEL) { >>> + printf( >>> + " -t, --type=error error type\n" >>> + " 0x0 : SRAO MEM\n" >>> + " 0x1 : SRAO LLC\n" >>> + " 0x2 : CMCI UCNA LLC\n"); + >>> } + if (cpu_vendor == CPU_VENDOR_AMD) { >>> + printf( >>> + " -t, --type=error error type\n" >>> + " 0x0: DRAM error\n" >>> + " 0x1: L3 cache error\n"); + >>> } + printf("\n"); >>> +} >>> + >>> +static void cpuid(const unsigned int *input, unsigned int regs[4]) >>> +{ + unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? >>> 0 : input[1]; +#ifdef __i386__ >>> + /* Use the stack to avoid reg constraint failures with some >>> gcc flags */ + asm ( >>> + "push %%ebx; push %%edx\n\t" >>> + "cpuid\n\t" >>> + "mov %%ebx,4(%4)\n\t" >>> + "mov %%edx,12(%4)\n\t" >>> + "pop %%edx; pop %%ebx\n\t" >>> + : "=a" (regs[0]), "=c" (regs[2]) >>> + : "0" (input[0]), "1" (count), "S" (regs) >>> + : "memory" ); >>> +#else >>> + asm ( >>> + "cpuid" >>> + : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" >>> (regs[3]) + : "0" (input[0]), "2" (count) ); +#endif >>> +} >>> + >>> +/* Get the manufacturer brand name of the host processor. */ >>> +static void cpuid_brand_get(char *str, size_t len) +{ >>> + unsigned int input[2] = { 0, 0 }; >>> + union { >>> + unsigned int regs[4]; >>> + struct { >>> + char eax[4]; >>> + char ebx[4]; >>> + char ecx[4]; >>> + char edx[4]; >>> + } str_regs; >>> + } cpu_branding; >>> + >>> + cpuid(input, cpu_branding.regs); >>> + >>> + snprintf(str, len, "%c%c%c%c%c%c%c%c%c%c%c%c", >>> + cpu_branding.str_regs.ebx[0], cpu_branding.str_regs.ebx[1], >>> + cpu_branding.str_regs.ebx[2], cpu_branding.str_regs.ebx[3], >>> + cpu_branding.str_regs.edx[0], cpu_branding.str_regs.edx[1], >>> + cpu_branding.str_regs.edx[2], cpu_branding.str_regs.edx[3], >>> + cpu_branding.str_regs.ecx[0], cpu_branding.str_regs.ecx[1], >>> + cpu_branding.str_regs.ecx[2], >>> cpu_branding.str_regs.ecx[3]); } >>> >>> int main(int argc, char *argv[]) >>> { >>> - int type = MCE_SRAO_MEM; >>> + int type; >>> int c, opt_index; >>> uint32_t domid; >>> xc_interface *xc_handle; >>> - int cpu_nr; >>> - int64_t gaddr, gpfn, mfn, haddr, max_gpa; >>> + unsigned int cpu_nr; >>> + uint64_t gaddr, gpfn, mfn, haddr, max_gpa; >>> + char cpu_brand[13]; >>> >>> /* Default Value */ >>> domid = DOMID_XEN; >>> gaddr = 0x180020; >>> cpu_nr = 0; >>> >>> + cpu_vendor = CPU_VENDOR_UNKNOWN; >>> + cpuid_brand_get(cpu_brand, sizeof(cpu_brand)); >>> + if (strstr(cpu_brand, "AMD")) >>> + cpu_vendor = CPU_VENDOR_AMD; >>> + if (strstr(cpu_brand, "Intel")) >>> + cpu_vendor = CPU_VENDOR_INTEL; >>> + >>> + switch (cpu_vendor) { >>> + case CPU_VENDOR_AMD: >>> + type = AMD_MCE_MEM; >>> + break; >>> + case CPU_VENDOR_INTEL: >>> + type = INTEL_MCE_SRAO_MEM; >>> + break; >>> + case CPU_VENDOR_UNKNOWN: >>> + default: >>> + Lprintf("Unknown cpu vendor on this machine\n"); + >>> exit(EXIT_FAILURE); + } >>> + >>> init_msr_inj(); >>> xc_handle = xc_interface_open(0, 0, 0); >>> if ( !xc_handle ) { >>> @@ -571,8 +787,8 @@ int main(int argc, char *argv[]) >>> exit(EXIT_FAILURE); } >>> >>> - while ( 1 ) { >>> - c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, >>> &opt_index); + for (;;) { + c = getopt_long(argc, argv, >>> "c:Dd:t:hp:r:e", opts, &opt_index); if ( c == -1 ) >>> break; >>> switch ( c ) { >>> @@ -580,23 +796,26 @@ int main(int argc, char *argv[]) >>> dump=1; break; >>> case ''c'': >>> - cpu_nr = strtol(optarg, &optarg, 10); >>> + cpu_nr = strtoul(optarg, &optarg, 0); >>> if ( strlen(optarg) != 0 ) >>> err(xc_handle, "Please input a digit parameter for >>> CPU\n"); break; case ''d'': >>> - domid = strtol(optarg, &optarg, 10); >>> + domid = strtoul(optarg, &optarg, 0); >>> if ( strlen(optarg) != 0 ) >>> err(xc_handle, "Please input a digit parameter for >>> domain\n"); break; >>> case ''p'': >>> - gaddr = strtol(optarg, &optarg, 0); >>> + gaddr = strtoul(optarg, &optarg, 0); >>> if ( strlen(optarg) != 0 ) >>> err(xc_handle, "Please input correct page >>> address\n"); break; >>> case ''t'': >>> type = strtol(optarg, NULL, 0); >>> break; >>> + case ''e'': >>> + opt_exception = 1; >>> + break; >>> case ''h'': >>> default: >>> help(); >>> @@ -627,19 +846,36 @@ int main(int argc, char *argv[]) >>> goto out; } >>> >>> - switch ( type ) >>> - { >>> - case MCE_SRAO_MEM: >>> - inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); >>> - break; >>> - case MCE_SRAO_LLC: >>> - inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); >>> - break; >>> - case CMCI_UCNA_LLC: >>> - inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); + >>> switch ( cpu_vendor ) { + case CPU_VENDOR_INTEL: >>> + switch ( type ) { >>> + case INTEL_MCE_SRAO_MEM: >>> + intel_inject_mem_srao(xc_handle, cpu_nr, domid, >>> gaddr); + break; + case INTEL_MCE_SRAO_LLC: >>> + intel_inject_llc_srao(xc_handle, cpu_nr, domid, >>> gaddr); + break; + case INTEL_CMCI_UCNA_LLC: >>> + intel_inject_llc_ucna(xc_handle, cpu_nr, domid, >>> gaddr); + break; + default: >>> + err(xc_handle, "Unsupported error type\n"); + >>> break; + } >>> break; >>> - default: >>> - err(xc_handle, "Unsupported error type\n"); + >>> + case CPU_VENDOR_AMD: >>> + switch ( type ) { >>> + case AMD_MCE_MEM: >>> + amd_inject_mem(xc_handle, cpu_nr, domid, gaddr); + >>> break; + case AMD_MCE_L3: >>> + amd_inject_l3(xc_handle, cpu_nr, domid, gaddr); + >>> break; + default: >>> + err(xc_handle, "Unsupported error type\n"); + >>> break; + } >>> break; >>> }
On 31.05.13 16:35, Liu, Jinsong wrote:> Christoph Egger wrote: >> On 31.05.13 13:31, Liu, Jinsong wrote: >>> Seems it cannot patch to latest tree, so please rebase (I agree it''s >>> our fault not response in time ... sorry). >>> Some comments below. >>> >>> Thanks, >>> Jinsong >>> >>> Egger, Christoph wrote: >>>> a8206654c64f:patches chegger$ cat xen_mceinj.diff >>>> commit a277555e158c87aed34196f72eba0a4cf8f0fb38 >>>> Author: Christoph Egger <chegger@amazon.de> >>>> Date: Wed Feb 27 14:52:19 2013 +0000 >>>> >>>> xen-mceinj: Support AMD. Add -e option. >>>> >>>> Add support for AMD. >>> >>> It''s pretty OK for this purpose, but ... >>> >>>> Add -e option to raise an exception. >>> >>> ... why need -e option w/ opt_exception variable? >>> AFAICT, it works fine w/o -e. Please add more comments >>> for strong reason, especially it need force user to change command >>> line. >> >> Without -e this allows you to test the polling handler and -e this >> allows you to test the exception handler. > > If for new polling purpose, isn''t it more reasonable to add > another explicit option for polling?What do you suggest? -p is already in use for the physical address.> In this way we keep same > style/interface with old one, defaultly test mce.You sound you have scripts that require an adjustment when the default behaviour changes, right?> What is more, please add description (especially the --help) > for polling purpose -- I didn''t find it from code review or fromdescription. Ok, I clarify the --help message. Christoph>> >>>> >>>> Signed-off-by: Christoph Egger <chegger@amazon.de> >>>> >>>> diff --git a/tools/tests/mce-test/tools/xen-mceinj.c >>>> b/tools/tests/mce-test/tools/xen-mceinj.c >>>> index e3e62f7..7400a02 100644 >>>> --- a/tools/tests/mce-test/tools/xen-mceinj.c >>>> +++ b/tools/tests/mce-test/tools/xen-mceinj.c >>>> @@ -1,6 +1,8 @@ >>>> /* >>>> * xen-mceinj.c: utilities to inject fake MCE for x86. >>>> * Copyright (c) 2010, Intel Corporation. >>>> + * Copyright (c) 2012, AMD Cooperation Inc. >>>> + * Copyright (c) 2013, Amazon.com, Inc. or its affiliates. * >>>> * This program is free software; you can redistribute it and/or >>>> modify it >>>> * under the terms and conditions of the GNU General Public >>>> License, @@ -18,6 +20,7 @@ >>>> * Authors: Yunhong Jiang <yunhong.jiang@intel.com> >>>> * Haicheng Li <haicheng.li@intel.com> >>>> * Xudong Hao <xudong.hao@intel.com> >>>> + * Christoph Egger <chegger@amazon.de> >>>> */ >>>> >>>> >>>> @@ -44,11 +47,14 @@ >>>> #define MCi_type_STATUS 0x1 >>>> #define MCi_type_ADDR 0x2 >>>> #define MCi_type_MISC 0x3 >>>> -#define MCi_type_CTL2 0x4 >>>> +#define MC4_type_MISC1 0x4 >>>> +#define MC4_type_MISC2 0x5 >>>> +#define MC4_type_MISC3 0x6 >>>> +#define MCi_type_CTL2 0x7 >>> >>> Why change original sequence? I didn''t see the necessity of doing so >>> --> please add new types behind old ones so that we don''t need test >>> old logic again. >> >> See Jan''s comment. > > Agree. > >> >>>> >>>> #define INVALID_MSR ~0UL >>>> >>>> -/* Intel MSRs */ >>>> +/* X86 machine check MSRs */ >>>> #define MSR_IA32_MCG_CAP 0x00000179 >>>> #define MSR_IA32_MCG_STATUS 0x0000017a >>>> #define MSR_IA32_MCG_CTL 0x0000017b >>>> @@ -56,35 +62,63 @@ >>>> #define MSR_IA32_MC0_STATUS 0x00000401 >>>> #define MSR_IA32_MC0_ADDR 0x00000402 >>>> #define MSR_IA32_MC0_MISC 0x00000403 >>>> + >>>> +/* Intel MSRs */ >>>> #define MSR_IA32_MC0_CTL2 0x00000280 >>>> >>>> -/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */ >>>> +/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE >>>> */ #define MCG_STATUS_SRAO_LLC_VAL 0x5 >>>> #define MCE_SRAO_LLC_BANK 0x7 >>>> #define MCi_STATUS_SRAO_LLC_VAL 0xBD2000008000017AUL >>>> #define MCi_MISC_SRAO_LLC_VAL 0x86UL >>>> >>>> -/* Memory Patrol Scrub SRAO MCE */ >>>> +/* Intel: Memory Patrol Scrub SRAO MCE */ >>>> #define MCG_STATUS_SRAO_MEM_VAL 0x5 >>>> #define MCE_SRAO_MEM_BANK 0x8 >>>> #define MCi_STATUS_SRAO_MEM_VAL 0xBD000000004000CFUL >>>> #define MCi_MISC_SRAO_MEM_VAL 0x86UL >>>> >>>> -/* LLC EWB UCNA Error */ >>>> +/* Intel: LLC EWB UCNA Error */ >>>> #define MCG_STATUS_UCNA_LLC_VAL 0x0 >>>> #define CMCI_UCNA_LLC_BANK 0x9 >>>> #define MCi_STATUS_UCNA_LLC_VAL 0xBC20000080000136UL >>>> #define MCi_MISC_UCNA_LLC_VAL 0x86UL >>>> >>>> -/* Error Types */ >>>> -#define MCE_SRAO_MEM 0x0 >>>> -#define MCE_SRAO_LLC 0x1 >>>> -#define CMCI_UCNA_LLC 0x2 >>>> +/* Intel: Error Types */ >>>> +#define INTEL_MCE_SRAO_MEM 0x0 >>>> +#define INTEL_MCE_SRAO_LLC 0x1 >>>> +#define INTEL_CMCI_UCNA_LLC 0x2 >>>> + >>>> +/* AMD: Memory Error */ >>>> +#define MCG_STATUS_MEM_VAL 0x5 >>>> +#define MCE_MEM_BANK 0x4 >>>> +#define MCi_STATUS_MEM_VAL 0xb4000000001c0100UL >>>> +//#define MCi_STATUS_MEM_VAL 0xb600000000000100UL >>> >>> Hmm, drop this test code please. >> >> What do you not like? The // ? >> >> Christoph > > Yes, If you like to keep test code, /* */ seems better -- Just my personal prefer. > > Thanks, > Jinsong > >> >>> >>>> +#define MCi_MISC_MEM_VAL 0x0 >>>> + >>>> +/* AMD: L3 Cache Error */ >>>> +#define MCG_STATUS_L3_VAL 0x5 >>>> +#define MCE_L3_BANK 0x4 >>>> +#define MCi_STATUS_L3_VAL 0xbc000400001c010bULL >>>> +#define MC4_MISC0_VAL 0x0 >>>> +#define MC4_MISC1_VAL 0x0 >>>> +#define MC4_MISC2_L3_VAL 0xc008000000000003ULL + >>>> +/* AMD: Error Types */ >>>> +#define AMD_MCE_MEM 0x0 /* memory error */ >>>> +#define AMD_MCE_L3 0x1 /* l3 cache */ >>>> >>>> #define LOGFILE stdout >>>> >>>> -int dump; >>>> -struct xen_mc_msrinject msr_inj; >>>> +static int dump; >>>> +static int opt_exception; >>>> +static struct xen_mc_msrinject msr_inj; >>>> + >>>> +#define CPU_VENDOR_UNKNOWN -1 >>>> +#define CPU_VENDOR_AMD 0 >>>> +#define CPU_VENDOR_INTEL 1 >>>> +static int cpu_vendor; >>>> + >>>> >>>> static void Lprintf(const char *fmt, ...) >>>> { >>>> @@ -145,7 +179,7 @@ static int mca_cpuinfo(xc_interface *xc_handle) >>>> return 0; } >>>> >>>> -static int inject_cmci(xc_interface *xc_handle, int cpu_nr) >>>> +static int intel_inject_cmci(xc_interface *xc_handle) { >>>> struct xen_mc mc; >>>> int nr_cpus; >>>> @@ -191,6 +225,15 @@ static uint64_t bank_addr(int bank, int type) >>>> case MCi_type_MISC: addr = MSR_IA32_MC0_CTL + (bank * >>>> 4) + type; break; + case MC4_type_MISC1: >>>> + addr = 0xc0000408; >>>> + break; >>>> + case MC4_type_MISC2: >>>> + addr = 0xc0000409; >>>> + break; >>>> + case MC4_type_MISC3: >>>> + addr = 0xc000040a; >>>> + break; >>>> case MCi_type_CTL2: >>>> addr = MSR_IA32_MC0_CTL2 + bank; >>>> break; >>>> @@ -356,12 +399,11 @@ static int inject_mci_status(xc_interface >>>> *xc_handle, } >>>> >>>> static int inject_mci_misc(xc_interface *xc_handle, >>>> - uint32_t cpu_nr, >>>> - uint64_t bank, >>>> - uint64_t val) >>>> + uint32_t cpu_nr, uint32_t misctype, >>>> + uint64_t bank, uint64_t val) { >>>> return add_msr_bank_intpose(xc_handle, cpu_nr, >>>> MC_MSRINJ_F_INTERPOSE, - MCi_type_MISC, bank, val); + >>>> MCi_type_MISC + misctype, bank, val); } >>>> >>>> static int inject_mci_addr(xc_interface *xc_handle, >>>> @@ -373,10 +415,8 @@ static int inject_mci_addr(xc_interface >>>> *xc_handle, MCi_type_ADDR, >>>> bank, val); } >>>> >>>> -static int inject_llc_srao(xc_interface *xc_handle, >>>> - uint32_t cpu_nr, >>>> - uint32_t domain, >>>> - uint64_t gaddr) >>>> +static int intel_inject_llc_srao(xc_interface *xc_handle, >>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { >>>> uint64_t gpfn, mfn, haddr; >>>> int ret = 0; >>>> @@ -390,7 +430,7 @@ static int inject_llc_srao(xc_interface >>>> *xc_handle, if ( ret ) err(xc_handle, "Failed to inject >>>> MCi_STATUS MSR\n"); >>>> >>>> - ret = inject_mci_misc(xc_handle, cpu_nr, >>>> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >>>> MCE_SRAO_LLC_BANK, >>>> MCi_MISC_SRAO_LLC_VAL); if ( ret ) err(xc_handle, >>>> "Failed to inject MCi_MISC MSR\n"); @@ -407,17 +447,18 @@ static >>>> int inject_llc_srao(xc_interface *xc_handle, ret >>>> flush_msr_inj(xc_handle); if ( ret ) >>>> err(xc_handle, "Failed to inject MSR\n"); >>>> - ret = inject_mce(xc_handle, cpu_nr); >>>> - if ( ret ) >>>> - err(xc_handle, "Failed to inject MCE error\n"); + >>>> + if (opt_exception) { >>>> + ret = inject_mce(xc_handle, cpu_nr); >>>> + if ( ret ) >>>> + err(xc_handle, "Failed to inject MCE error\n"); + } >>>> >>>> return 0; >>>> } >>>> >>>> -static int inject_mem_srao(xc_interface *xc_handle, >>>> - uint32_t cpu_nr, >>>> - uint32_t domain, >>>> - uint64_t gaddr) >>>> +static int intel_inject_mem_srao(xc_interface *xc_handle, >>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { >>>> uint64_t gpfn, mfn, haddr; >>>> int ret = 0; >>>> @@ -431,7 +472,7 @@ static int inject_mem_srao(xc_interface >>>> *xc_handle, if ( ret ) err(xc_handle, "Failed to inject >>>> MCi_STATUS MSR\n"); >>>> >>>> - ret = inject_mci_misc(xc_handle, cpu_nr, >>>> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >>>> MCE_SRAO_MEM_BANK, >>>> MCi_MISC_SRAO_MEM_VAL); if ( ret ) err(xc_handle, >>>> "Failed to inject MCi_MISC MSR\n"); @@ -448,17 +489,18 @@ static >>>> int inject_mem_srao(xc_interface *xc_handle, ret >>>> flush_msr_inj(xc_handle); if ( ret ) >>>> err(xc_handle, "Failed to inject MSR\n"); >>>> - ret = inject_mce(xc_handle, cpu_nr); >>>> - if ( ret ) >>>> - err(xc_handle, "Failed to inject MCE error\n"); + >>>> + if (opt_exception) { >>>> + ret = inject_mce(xc_handle, cpu_nr); >>>> + if ( ret ) >>>> + err(xc_handle, "Failed to inject MCE error\n"); + } >>>> >>>> return 0; >>>> } >>>> >>>> -static int inject_llc_ucna(xc_interface *xc_handle, >>>> - uint32_t cpu_nr, >>>> - uint32_t domain, >>>> - uint64_t gaddr) >>>> +static int intel_inject_llc_ucna(xc_interface *xc_handle, >>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { >>>> uint64_t gpfn, mfn, haddr; >>>> int ret = 0; >>>> @@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface >>>> *xc_handle, if ( ret ) err(xc_handle, "Failed to inject >>>> MCi_STATUS MSR\n"); >>>> >>>> - ret = inject_mci_misc(xc_handle, cpu_nr, >>>> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >>>> CMCI_UCNA_LLC_BANK, >>>> MCi_MISC_UCNA_LLC_VAL); if ( ret ) err(xc_handle, >>>> "Failed to inject MCi_MISC MSR\n"); @@ -489,13 +531,108 @@ static >>>> int inject_llc_ucna(xc_interface *xc_handle, ret >>>> flush_msr_inj(xc_handle); if ( ret ) >>>> err(xc_handle, "Failed to inject MSR\n"); >>>> - ret = inject_cmci(xc_handle, cpu_nr); >>>> + ret = intel_inject_cmci(xc_handle); >>>> if ( ret ) >>>> err(xc_handle, "Failed to inject MCE error\n"); >>>> >>>> return 0; >>>> } >>>> >>>> +static int amd_inject_mem(xc_interface *xc_handle, >>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ >>>> + uint64_t gpfn, mfn, haddr; >>>> + int ret = 0; >>>> + >>>> + ret = inject_mcg_status(xc_handle, cpu_nr, >>>> MCG_STATUS_MEM_VAL); + if ( ret ) + err(xc_handle, >>>> "Failed to inject MCG_STATUS MSR\n"); + + ret >>>> inject_mci_status(xc_handle, cpu_nr, + >>>> MCE_MEM_BANK, MCi_STATUS_MEM_VAL); + if ( ret ) + >>>> err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); + + ret >>>> inject_mci_misc(xc_handle, cpu_nr, 0, + >>>> MCE_MEM_BANK, MCi_MISC_MEM_VAL); + if ( ret ) + >>>> err(xc_handle, "Failed to inject MCi_MISC MSR\n"); + + gpfn >>>> gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, >>>> gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN >>>> is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & >>>> (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, >>>> MCE_MEM_BANK, haddr); + if ( ret ) + err(xc_handle, >>>> "Failed to inject MCi_ADDR MSR\n"); + + ret >>>> flush_msr_inj(xc_handle); + if ( ret ) >>>> + err(xc_handle, "Failed to inject MSR\n"); >>>> + >>>> + if (opt_exception) { >>>> + ret = inject_mce(xc_handle, cpu_nr); >>>> + if ( ret ) >>>> + err(xc_handle, "Failed to inject MCE error\n"); + } >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +static int amd_inject_l3(xc_interface *xc_handle, >>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ >>>> + uint64_t gpfn, mfn, haddr; >>>> + int ret = 0; >>>> + >>>> + ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL); >>>> + if ( ret ) + err(xc_handle, "Failed to inject >>>> MCG_STATUS MSR\n"); + + ret = inject_mci_status(xc_handle, >>>> cpu_nr, + MCE_L3_BANK, >>>> MCi_STATUS_L3_VAL); + if ( ret ) + err(xc_handle, "Failed >>>> to inject MCi_STATUS MSR\n"); + + ret >>>> inject_mci_misc(xc_handle, cpu_nr, 0, + >>>> MCE_L3_BANK, MC4_MISC0_VAL); + if ( ret ) + >>>> err(xc_handle, "Failed to inject MC4_MISC0 MSR\n"); + + ret >>>> inject_mci_misc(xc_handle, cpu_nr, 1, + >>>> MCE_L3_BANK, MC4_MISC1_VAL); + if ( ret ) + >>>> err(xc_handle, "Failed to inject MC4_MISC1 MSR\n"); + + ret >>>> inject_mci_misc(xc_handle, cpu_nr, 2, + >>>> MCE_L3_BANK, MC4_MISC2_L3_VAL); + if ( ret ) + >>>> err(xc_handle, "Failed to inject MC4_MISC2 MSR\n"); + + gpfn >>>> gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, >>>> gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN >>>> is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & >>>> (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, >>>> MCE_L3_BANK, haddr); + if ( ret ) + err(xc_handle, >>>> "Failed to inject MCi_ADDR MSR\n"); + + ret >>>> flush_msr_inj(xc_handle); + if ( ret ) >>>> + err(xc_handle, "Failed to inject MSR\n"); >>>> + >>>> + if (opt_exception) { >>>> + ret = inject_mce(xc_handle, cpu_nr); >>>> + if ( ret ) >>>> + err(xc_handle, "Failed to inject MCE error\n"); + } >>>> + >>>> + return 0; >>>> +} >>>> + >>>> + >>>> static long xs_get_dom_mem(int domid) >>>> { >>>> char path[128]; >>>> @@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid) if >>>> (!xs) return -1; >>>> >>>> - sprintf(path, "/local/domain/%d/memory/target", domid); >>>> + snprintf(path, sizeof(path), "/local/domain/%d/memory/target", >>>> domid); memstr = xs_read(xs, XBT_NULL, path, &plen); >>>> xs_daemon_close(xs); >>>> >>>> @@ -540,30 +677,109 @@ static void help(void) >>>> " -D, --dump dump addr info without error >>>> injection\n" " -c, --cpu=CPU_ID target CPU\n" >>>> " -d, --domain=DomID target domain, the default is >>>> Xen itself\n" + " -e raise MCE >>>> exception\n" " -h, --help print this page\n" >>>> " -p, --phyaddr physical address\n" >>>> - " -t, --type=error error type\n" >>>> - " 0 : MCE_SRAO_MEM\n" >>>> - " 1 : MCE_SRAO_LLC\n" >>>> - " 2 : CMCI_UCNA_LLC\n" >>>> - "\n" >>>> ); >>>> + >>>> + if (cpu_vendor == CPU_VENDOR_INTEL) { >>>> + printf( >>>> + " -t, --type=error error type\n" >>>> + " 0x0 : SRAO MEM\n" >>>> + " 0x1 : SRAO LLC\n" >>>> + " 0x2 : CMCI UCNA LLC\n"); + >>>> } + if (cpu_vendor == CPU_VENDOR_AMD) { >>>> + printf( >>>> + " -t, --type=error error type\n" >>>> + " 0x0: DRAM error\n" >>>> + " 0x1: L3 cache error\n"); + >>>> } + printf("\n"); >>>> +} >>>> + >>>> +static void cpuid(const unsigned int *input, unsigned int regs[4]) >>>> +{ + unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? >>>> 0 : input[1]; +#ifdef __i386__ >>>> + /* Use the stack to avoid reg constraint failures with some >>>> gcc flags */ + asm ( >>>> + "push %%ebx; push %%edx\n\t" >>>> + "cpuid\n\t" >>>> + "mov %%ebx,4(%4)\n\t" >>>> + "mov %%edx,12(%4)\n\t" >>>> + "pop %%edx; pop %%ebx\n\t" >>>> + : "=a" (regs[0]), "=c" (regs[2]) >>>> + : "0" (input[0]), "1" (count), "S" (regs) >>>> + : "memory" ); >>>> +#else >>>> + asm ( >>>> + "cpuid" >>>> + : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" >>>> (regs[3]) + : "0" (input[0]), "2" (count) ); +#endif >>>> +} >>>> + >>>> +/* Get the manufacturer brand name of the host processor. */ >>>> +static void cpuid_brand_get(char *str, size_t len) +{ >>>> + unsigned int input[2] = { 0, 0 }; >>>> + union { >>>> + unsigned int regs[4]; >>>> + struct { >>>> + char eax[4]; >>>> + char ebx[4]; >>>> + char ecx[4]; >>>> + char edx[4]; >>>> + } str_regs; >>>> + } cpu_branding; >>>> + >>>> + cpuid(input, cpu_branding.regs); >>>> + >>>> + snprintf(str, len, "%c%c%c%c%c%c%c%c%c%c%c%c", >>>> + cpu_branding.str_regs.ebx[0], cpu_branding.str_regs.ebx[1], >>>> + cpu_branding.str_regs.ebx[2], cpu_branding.str_regs.ebx[3], >>>> + cpu_branding.str_regs.edx[0], cpu_branding.str_regs.edx[1], >>>> + cpu_branding.str_regs.edx[2], cpu_branding.str_regs.edx[3], >>>> + cpu_branding.str_regs.ecx[0], cpu_branding.str_regs.ecx[1], >>>> + cpu_branding.str_regs.ecx[2], >>>> cpu_branding.str_regs.ecx[3]); } >>>> >>>> int main(int argc, char *argv[]) >>>> { >>>> - int type = MCE_SRAO_MEM; >>>> + int type; >>>> int c, opt_index; >>>> uint32_t domid; >>>> xc_interface *xc_handle; >>>> - int cpu_nr; >>>> - int64_t gaddr, gpfn, mfn, haddr, max_gpa; >>>> + unsigned int cpu_nr; >>>> + uint64_t gaddr, gpfn, mfn, haddr, max_gpa; >>>> + char cpu_brand[13]; >>>> >>>> /* Default Value */ >>>> domid = DOMID_XEN; >>>> gaddr = 0x180020; >>>> cpu_nr = 0; >>>> >>>> + cpu_vendor = CPU_VENDOR_UNKNOWN; >>>> + cpuid_brand_get(cpu_brand, sizeof(cpu_brand)); >>>> + if (strstr(cpu_brand, "AMD")) >>>> + cpu_vendor = CPU_VENDOR_AMD; >>>> + if (strstr(cpu_brand, "Intel")) >>>> + cpu_vendor = CPU_VENDOR_INTEL; >>>> + >>>> + switch (cpu_vendor) { >>>> + case CPU_VENDOR_AMD: >>>> + type = AMD_MCE_MEM; >>>> + break; >>>> + case CPU_VENDOR_INTEL: >>>> + type = INTEL_MCE_SRAO_MEM; >>>> + break; >>>> + case CPU_VENDOR_UNKNOWN: >>>> + default: >>>> + Lprintf("Unknown cpu vendor on this machine\n"); + >>>> exit(EXIT_FAILURE); + } >>>> + >>>> init_msr_inj(); >>>> xc_handle = xc_interface_open(0, 0, 0); >>>> if ( !xc_handle ) { >>>> @@ -571,8 +787,8 @@ int main(int argc, char *argv[]) >>>> exit(EXIT_FAILURE); } >>>> >>>> - while ( 1 ) { >>>> - c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, >>>> &opt_index); + for (;;) { + c = getopt_long(argc, argv, >>>> "c:Dd:t:hp:r:e", opts, &opt_index); if ( c == -1 ) >>>> break; >>>> switch ( c ) { >>>> @@ -580,23 +796,26 @@ int main(int argc, char *argv[]) >>>> dump=1; break; >>>> case ''c'': >>>> - cpu_nr = strtol(optarg, &optarg, 10); >>>> + cpu_nr = strtoul(optarg, &optarg, 0); >>>> if ( strlen(optarg) != 0 ) >>>> err(xc_handle, "Please input a digit parameter for >>>> CPU\n"); break; case ''d'': >>>> - domid = strtol(optarg, &optarg, 10); >>>> + domid = strtoul(optarg, &optarg, 0); >>>> if ( strlen(optarg) != 0 ) >>>> err(xc_handle, "Please input a digit parameter for >>>> domain\n"); break; >>>> case ''p'': >>>> - gaddr = strtol(optarg, &optarg, 0); >>>> + gaddr = strtoul(optarg, &optarg, 0); >>>> if ( strlen(optarg) != 0 ) >>>> err(xc_handle, "Please input correct page >>>> address\n"); break; >>>> case ''t'': >>>> type = strtol(optarg, NULL, 0); >>>> break; >>>> + case ''e'': >>>> + opt_exception = 1; >>>> + break; >>>> case ''h'': >>>> default: >>>> help(); >>>> @@ -627,19 +846,36 @@ int main(int argc, char *argv[]) >>>> goto out; } >>>> >>>> - switch ( type ) >>>> - { >>>> - case MCE_SRAO_MEM: >>>> - inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); >>>> - break; >>>> - case MCE_SRAO_LLC: >>>> - inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); >>>> - break; >>>> - case CMCI_UCNA_LLC: >>>> - inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); + >>>> switch ( cpu_vendor ) { + case CPU_VENDOR_INTEL: >>>> + switch ( type ) { >>>> + case INTEL_MCE_SRAO_MEM: >>>> + intel_inject_mem_srao(xc_handle, cpu_nr, domid, >>>> gaddr); + break; + case INTEL_MCE_SRAO_LLC: >>>> + intel_inject_llc_srao(xc_handle, cpu_nr, domid, >>>> gaddr); + break; + case INTEL_CMCI_UCNA_LLC: >>>> + intel_inject_llc_ucna(xc_handle, cpu_nr, domid, >>>> gaddr); + break; + default: >>>> + err(xc_handle, "Unsupported error type\n"); + >>>> break; + } >>>> break; >>>> - default: >>>> - err(xc_handle, "Unsupported error type\n"); + >>>> + case CPU_VENDOR_AMD: >>>> + switch ( type ) { >>>> + case AMD_MCE_MEM: >>>> + amd_inject_mem(xc_handle, cpu_nr, domid, gaddr); + >>>> break; + case AMD_MCE_L3: >>>> + amd_inject_l3(xc_handle, cpu_nr, domid, gaddr); + >>>> break; + default: >>>> + err(xc_handle, "Unsupported error type\n"); + >>>> break; + } >>>> break; >>>> } >
Egger, Christoph wrote:> On 31.05.13 16:35, Liu, Jinsong wrote: >> Christoph Egger wrote: >>> On 31.05.13 13:31, Liu, Jinsong wrote: >>>> Seems it cannot patch to latest tree, so please rebase (I agree >>>> it''s our fault not response in time ... sorry). >>>> Some comments below. >>>> >>>> Thanks, >>>> Jinsong >>>> >>>> Egger, Christoph wrote: >>>>> a8206654c64f:patches chegger$ cat xen_mceinj.diff >>>>> commit a277555e158c87aed34196f72eba0a4cf8f0fb38 >>>>> Author: Christoph Egger <chegger@amazon.de> >>>>> Date: Wed Feb 27 14:52:19 2013 +0000 >>>>> >>>>> xen-mceinj: Support AMD. Add -e option. >>>>> >>>>> Add support for AMD. >>>> >>>> It''s pretty OK for this purpose, but ... >>>> >>>>> Add -e option to raise an exception. >>>> >>>> ... why need -e option w/ opt_exception variable? >>>> AFAICT, it works fine w/o -e. Please add more comments >>>> for strong reason, especially it need force user to change command >>>> line. >>> >>> Without -e this allows you to test the polling handler and -e this >>> allows you to test the exception handler. >> >> If for new polling purpose, isn''t it more reasonable to add >> another explicit option for polling? > > What do you suggest? -p is already in use for the physical address.say, -P, considering how rarely polling case was tested, it''s OK.> >> In this way we keep same >> style/interface with old one, defaultly test mce. > > You sound you have scripts that require an adjustment when the > default behaviour changes, right?Our QA use scripts for nightly test (though it''s not difficult to change script). As common usage, the default option should for most common case -- we always use this tools to test mce case. It''s just weird to me that user have to change interface to add a special option for most common test case. But anyway, if you don''t want to change your approach, I don''t strongly against it, as far as Jan agree it. Thanks, Jinsong> >> What is more, please add description (especially the --help) >> for polling purpose -- I didn''t find it from code review or from >> description. > > Ok, I clarify the --help message. > > Christoph > >>> >>>>> >>>>> Signed-off-by: Christoph Egger <chegger@amazon.de> >>>>> >>>>> diff --git a/tools/tests/mce-test/tools/xen-mceinj.c >>>>> b/tools/tests/mce-test/tools/xen-mceinj.c >>>>> index e3e62f7..7400a02 100644 >>>>> --- a/tools/tests/mce-test/tools/xen-mceinj.c >>>>> +++ b/tools/tests/mce-test/tools/xen-mceinj.c >>>>> @@ -1,6 +1,8 @@ >>>>> /* >>>>> * xen-mceinj.c: utilities to inject fake MCE for x86. >>>>> * Copyright (c) 2010, Intel Corporation. >>>>> + * Copyright (c) 2012, AMD Cooperation Inc. >>>>> + * Copyright (c) 2013, Amazon.com, Inc. or its affiliates. * >>>>> * This program is free software; you can redistribute it and/or >>>>> modify it >>>>> * under the terms and conditions of the GNU General Public >>>>> License, @@ -18,6 +20,7 @@ >>>>> * Authors: Yunhong Jiang <yunhong.jiang@intel.com> >>>>> * Haicheng Li <haicheng.li@intel.com> >>>>> * Xudong Hao <xudong.hao@intel.com> >>>>> + * Christoph Egger <chegger@amazon.de> >>>>> */ >>>>> >>>>> >>>>> @@ -44,11 +47,14 @@ >>>>> #define MCi_type_STATUS 0x1 >>>>> #define MCi_type_ADDR 0x2 >>>>> #define MCi_type_MISC 0x3 >>>>> -#define MCi_type_CTL2 0x4 >>>>> +#define MC4_type_MISC1 0x4 >>>>> +#define MC4_type_MISC2 0x5 >>>>> +#define MC4_type_MISC3 0x6 >>>>> +#define MCi_type_CTL2 0x7 >>>> >>>> Why change original sequence? I didn''t see the necessity of doing >>>> so --> please add new types behind old ones so that we don''t need >>>> test old logic again. >>> >>> See Jan''s comment. >> >> Agree. >> >>> >>>>> >>>>> #define INVALID_MSR ~0UL >>>>> >>>>> -/* Intel MSRs */ >>>>> +/* X86 machine check MSRs */ >>>>> #define MSR_IA32_MCG_CAP 0x00000179 >>>>> #define MSR_IA32_MCG_STATUS 0x0000017a >>>>> #define MSR_IA32_MCG_CTL 0x0000017b >>>>> @@ -56,35 +62,63 @@ >>>>> #define MSR_IA32_MC0_STATUS 0x00000401 >>>>> #define MSR_IA32_MC0_ADDR 0x00000402 >>>>> #define MSR_IA32_MC0_MISC 0x00000403 >>>>> + >>>>> +/* Intel MSRs */ >>>>> #define MSR_IA32_MC0_CTL2 0x00000280 >>>>> >>>>> -/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */ >>>>> +/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO >>>>> MCE */ #define MCG_STATUS_SRAO_LLC_VAL 0x5 >>>>> #define MCE_SRAO_LLC_BANK 0x7 >>>>> #define MCi_STATUS_SRAO_LLC_VAL 0xBD2000008000017AUL >>>>> #define MCi_MISC_SRAO_LLC_VAL 0x86UL >>>>> >>>>> -/* Memory Patrol Scrub SRAO MCE */ >>>>> +/* Intel: Memory Patrol Scrub SRAO MCE */ >>>>> #define MCG_STATUS_SRAO_MEM_VAL 0x5 >>>>> #define MCE_SRAO_MEM_BANK 0x8 >>>>> #define MCi_STATUS_SRAO_MEM_VAL 0xBD000000004000CFUL >>>>> #define MCi_MISC_SRAO_MEM_VAL 0x86UL >>>>> >>>>> -/* LLC EWB UCNA Error */ >>>>> +/* Intel: LLC EWB UCNA Error */ >>>>> #define MCG_STATUS_UCNA_LLC_VAL 0x0 >>>>> #define CMCI_UCNA_LLC_BANK 0x9 >>>>> #define MCi_STATUS_UCNA_LLC_VAL 0xBC20000080000136UL >>>>> #define MCi_MISC_UCNA_LLC_VAL 0x86UL >>>>> >>>>> -/* Error Types */ >>>>> -#define MCE_SRAO_MEM 0x0 >>>>> -#define MCE_SRAO_LLC 0x1 >>>>> -#define CMCI_UCNA_LLC 0x2 >>>>> +/* Intel: Error Types */ >>>>> +#define INTEL_MCE_SRAO_MEM 0x0 >>>>> +#define INTEL_MCE_SRAO_LLC 0x1 >>>>> +#define INTEL_CMCI_UCNA_LLC 0x2 >>>>> + >>>>> +/* AMD: Memory Error */ >>>>> +#define MCG_STATUS_MEM_VAL 0x5 >>>>> +#define MCE_MEM_BANK 0x4 >>>>> +#define MCi_STATUS_MEM_VAL 0xb4000000001c0100UL >>>>> +//#define MCi_STATUS_MEM_VAL 0xb600000000000100UL >>>> >>>> Hmm, drop this test code please. >>> >>> What do you not like? The // ? >>> >>> Christoph >> >> Yes, If you like to keep test code, /* */ seems better -- Just my >> personal prefer. >> >> Thanks, >> Jinsong >> >>> >>>> >>>>> +#define MCi_MISC_MEM_VAL 0x0 >>>>> + >>>>> +/* AMD: L3 Cache Error */ >>>>> +#define MCG_STATUS_L3_VAL 0x5 >>>>> +#define MCE_L3_BANK 0x4 >>>>> +#define MCi_STATUS_L3_VAL 0xbc000400001c010bULL >>>>> +#define MC4_MISC0_VAL 0x0 >>>>> +#define MC4_MISC1_VAL 0x0 >>>>> +#define MC4_MISC2_L3_VAL 0xc008000000000003ULL + +/* >>>>> AMD: Error Types */ +#define AMD_MCE_MEM 0x0 /* >>>>> memory error */ +#define AMD_MCE_L3 0x1 /* l3 >>>>> cache */ >>>>> >>>>> #define LOGFILE stdout >>>>> >>>>> -int dump; >>>>> -struct xen_mc_msrinject msr_inj; >>>>> +static int dump; >>>>> +static int opt_exception; >>>>> +static struct xen_mc_msrinject msr_inj; >>>>> + >>>>> +#define CPU_VENDOR_UNKNOWN -1 >>>>> +#define CPU_VENDOR_AMD 0 >>>>> +#define CPU_VENDOR_INTEL 1 >>>>> +static int cpu_vendor; >>>>> + >>>>> >>>>> static void Lprintf(const char *fmt, ...) >>>>> { >>>>> @@ -145,7 +179,7 @@ static int mca_cpuinfo(xc_interface >>>>> *xc_handle) return 0; } >>>>> >>>>> -static int inject_cmci(xc_interface *xc_handle, int cpu_nr) >>>>> +static int intel_inject_cmci(xc_interface *xc_handle) { >>>>> struct xen_mc mc; int nr_cpus; >>>>> @@ -191,6 +225,15 @@ static uint64_t bank_addr(int bank, int type) >>>>> case MCi_type_MISC: addr = MSR_IA32_MC0_CTL + (bank * >>>>> 4) + type; break; + case MC4_type_MISC1: >>>>> + addr = 0xc0000408; >>>>> + break; >>>>> + case MC4_type_MISC2: >>>>> + addr = 0xc0000409; >>>>> + break; >>>>> + case MC4_type_MISC3: >>>>> + addr = 0xc000040a; >>>>> + break; >>>>> case MCi_type_CTL2: >>>>> addr = MSR_IA32_MC0_CTL2 + bank; >>>>> break; >>>>> @@ -356,12 +399,11 @@ static int inject_mci_status(xc_interface >>>>> *xc_handle, } >>>>> >>>>> static int inject_mci_misc(xc_interface *xc_handle, >>>>> - uint32_t cpu_nr, >>>>> - uint64_t bank, >>>>> - uint64_t val) >>>>> + uint32_t cpu_nr, uint32_t misctype, >>>>> + uint64_t bank, uint64_t val) { >>>>> return add_msr_bank_intpose(xc_handle, cpu_nr, >>>>> MC_MSRINJ_F_INTERPOSE, - MCi_type_MISC, bank, val); + >>>>> MCi_type_MISC + misctype, bank, val); } >>>>> >>>>> static int inject_mci_addr(xc_interface *xc_handle, >>>>> @@ -373,10 +415,8 @@ static int inject_mci_addr(xc_interface >>>>> *xc_handle, MCi_type_ADDR, >>>>> bank, val); } >>>>> >>>>> -static int inject_llc_srao(xc_interface *xc_handle, >>>>> - uint32_t cpu_nr, >>>>> - uint32_t domain, >>>>> - uint64_t gaddr) >>>>> +static int intel_inject_llc_srao(xc_interface *xc_handle, >>>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { >>>>> uint64_t gpfn, mfn, haddr; >>>>> int ret = 0; >>>>> @@ -390,7 +430,7 @@ static int inject_llc_srao(xc_interface >>>>> *xc_handle, if ( ret ) err(xc_handle, "Failed to inject >>>>> MCi_STATUS MSR\n"); >>>>> >>>>> - ret = inject_mci_misc(xc_handle, cpu_nr, >>>>> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >>>>> MCE_SRAO_LLC_BANK, >>>>> MCi_MISC_SRAO_LLC_VAL); if ( ret ) err(xc_handle, >>>>> "Failed to inject MCi_MISC MSR\n"); @@ -407,17 +447,18 @@ static >>>>> int inject_llc_srao(xc_interface *xc_handle, ret >>>>> flush_msr_inj(xc_handle); if ( ret ) >>>>> err(xc_handle, "Failed to inject MSR\n"); >>>>> - ret = inject_mce(xc_handle, cpu_nr); >>>>> - if ( ret ) >>>>> - err(xc_handle, "Failed to inject MCE error\n"); + + >>>>> if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); >>>>> + if ( ret ) >>>>> + err(xc_handle, "Failed to inject MCE error\n"); + >>>>> } >>>>> >>>>> return 0; >>>>> } >>>>> >>>>> -static int inject_mem_srao(xc_interface *xc_handle, >>>>> - uint32_t cpu_nr, >>>>> - uint32_t domain, >>>>> - uint64_t gaddr) >>>>> +static int intel_inject_mem_srao(xc_interface *xc_handle, >>>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { >>>>> uint64_t gpfn, mfn, haddr; >>>>> int ret = 0; >>>>> @@ -431,7 +472,7 @@ static int inject_mem_srao(xc_interface >>>>> *xc_handle, if ( ret ) err(xc_handle, "Failed to inject >>>>> MCi_STATUS MSR\n"); >>>>> >>>>> - ret = inject_mci_misc(xc_handle, cpu_nr, >>>>> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >>>>> MCE_SRAO_MEM_BANK, >>>>> MCi_MISC_SRAO_MEM_VAL); if ( ret ) err(xc_handle, >>>>> "Failed to inject MCi_MISC MSR\n"); @@ -448,17 +489,18 @@ static >>>>> int inject_mem_srao(xc_interface *xc_handle, ret >>>>> flush_msr_inj(xc_handle); if ( ret ) >>>>> err(xc_handle, "Failed to inject MSR\n"); >>>>> - ret = inject_mce(xc_handle, cpu_nr); >>>>> - if ( ret ) >>>>> - err(xc_handle, "Failed to inject MCE error\n"); + + >>>>> if (opt_exception) { + ret = inject_mce(xc_handle, cpu_nr); >>>>> + if ( ret ) >>>>> + err(xc_handle, "Failed to inject MCE error\n"); + >>>>> } >>>>> >>>>> return 0; >>>>> } >>>>> >>>>> -static int inject_llc_ucna(xc_interface *xc_handle, >>>>> - uint32_t cpu_nr, >>>>> - uint32_t domain, >>>>> - uint64_t gaddr) >>>>> +static int intel_inject_llc_ucna(xc_interface *xc_handle, >>>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) { >>>>> uint64_t gpfn, mfn, haddr; >>>>> int ret = 0; >>>>> @@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface >>>>> *xc_handle, if ( ret ) err(xc_handle, "Failed to inject >>>>> MCi_STATUS MSR\n"); >>>>> >>>>> - ret = inject_mci_misc(xc_handle, cpu_nr, >>>>> + ret = inject_mci_misc(xc_handle, cpu_nr, 0, >>>>> CMCI_UCNA_LLC_BANK, >>>>> MCi_MISC_UCNA_LLC_VAL); if ( ret ) err(xc_handle, >>>>> "Failed to inject MCi_MISC MSR\n"); @@ -489,13 +531,108 @@ static >>>>> int inject_llc_ucna(xc_interface *xc_handle, ret >>>>> flush_msr_inj(xc_handle); if ( ret ) >>>>> err(xc_handle, "Failed to inject MSR\n"); >>>>> - ret = inject_cmci(xc_handle, cpu_nr); >>>>> + ret = intel_inject_cmci(xc_handle); >>>>> if ( ret ) >>>>> err(xc_handle, "Failed to inject MCE error\n"); >>>>> >>>>> return 0; >>>>> } >>>>> >>>>> +static int amd_inject_mem(xc_interface *xc_handle, >>>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ >>>>> + uint64_t gpfn, mfn, haddr; >>>>> + int ret = 0; >>>>> + >>>>> + ret = inject_mcg_status(xc_handle, cpu_nr, >>>>> MCG_STATUS_MEM_VAL); + if ( ret ) + err(xc_handle, >>>>> "Failed to inject MCG_STATUS MSR\n"); + + ret >>>>> inject_mci_status(xc_handle, cpu_nr, + >>>>> MCE_MEM_BANK, MCi_STATUS_MEM_VAL); + if ( ret ) + >>>>> err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); + + ret >>>>> inject_mci_misc(xc_handle, cpu_nr, 0, + >>>>> MCE_MEM_BANK, MCi_MISC_MEM_VAL); + if ( ret ) + >>>>> err(xc_handle, "Failed to inject MCi_MISC MSR\n"); + + gpfn >>>>> gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, >>>>> gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN >>>>> is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & >>>>> (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, >>>>> MCE_MEM_BANK, haddr); + if ( ret ) + err(xc_handle, >>>>> "Failed to inject MCi_ADDR MSR\n"); + + ret >>>>> flush_msr_inj(xc_handle); + if ( ret ) >>>>> + err(xc_handle, "Failed to inject MSR\n"); >>>>> + >>>>> + if (opt_exception) { >>>>> + ret = inject_mce(xc_handle, cpu_nr); >>>>> + if ( ret ) >>>>> + err(xc_handle, "Failed to inject MCE error\n"); + >>>>> } + + return 0; >>>>> +} >>>>> + >>>>> +static int amd_inject_l3(xc_interface *xc_handle, >>>>> + uint32_t cpu_nr, uint32_t domain, uint64_t gaddr) +{ >>>>> + uint64_t gpfn, mfn, haddr; >>>>> + int ret = 0; >>>>> + >>>>> + ret = inject_mcg_status(xc_handle, cpu_nr, >>>>> MCG_STATUS_L3_VAL); + if ( ret ) + err(xc_handle, >>>>> "Failed to inject MCG_STATUS MSR\n"); + + ret >>>>> inject_mci_status(xc_handle, cpu_nr, + >>>>> MCE_L3_BANK, MCi_STATUS_L3_VAL); + if ( ret ) + >>>>> err(xc_handle, "Failed to inject MCi_STATUS MSR\n"); + + ret >>>>> inject_mci_misc(xc_handle, cpu_nr, 0, + >>>>> MCE_L3_BANK, MC4_MISC0_VAL); + if ( ret ) + >>>>> err(xc_handle, "Failed to inject MC4_MISC0 MSR\n"); + + ret >>>>> inject_mci_misc(xc_handle, cpu_nr, 1, + >>>>> MCE_L3_BANK, MC4_MISC1_VAL); + if ( ret ) + >>>>> err(xc_handle, "Failed to inject MC4_MISC1 MSR\n"); + + ret >>>>> inject_mci_misc(xc_handle, cpu_nr, 2, + >>>>> MCE_L3_BANK, MC4_MISC2_L3_VAL); + if ( ret ) + >>>>> err(xc_handle, "Failed to inject MC4_MISC2 MSR\n"); + + gpfn >>>>> gaddr >> PAGE_SHIFT; + mfn = mca_gpfn_to_mfn(xc_handle, domain, >>>>> gpfn); + if (!mfn_valid(mfn)) + err(xc_handle, "The MFN >>>>> is not valid\n"); + haddr = (mfn << PAGE_SHIFT) | (gaddr & >>>>> (PAGE_SIZE - 1)); + ret = inject_mci_addr(xc_handle, cpu_nr, >>>>> MCE_L3_BANK, haddr); + if ( ret ) + err(xc_handle, >>>>> "Failed to inject MCi_ADDR MSR\n"); + + ret >>>>> flush_msr_inj(xc_handle); + if ( ret ) >>>>> + err(xc_handle, "Failed to inject MSR\n"); >>>>> + >>>>> + if (opt_exception) { >>>>> + ret = inject_mce(xc_handle, cpu_nr); >>>>> + if ( ret ) >>>>> + err(xc_handle, "Failed to inject MCE error\n"); + >>>>> } + + return 0; >>>>> +} >>>>> + >>>>> + >>>>> static long xs_get_dom_mem(int domid) >>>>> { >>>>> char path[128]; >>>>> @@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid) if >>>>> (!xs) return -1; >>>>> >>>>> - sprintf(path, "/local/domain/%d/memory/target", domid); >>>>> + snprintf(path, sizeof(path), >>>>> "/local/domain/%d/memory/target", domid); memstr >>>>> xs_read(xs, XBT_NULL, path, &plen); xs_daemon_close(xs); >>>>> >>>>> @@ -540,30 +677,109 @@ static void help(void) >>>>> " -D, --dump dump addr info without error >>>>> injection\n" " -c, --cpu=CPU_ID target CPU\n" >>>>> " -d, --domain=DomID target domain, the default is >>>>> Xen itself\n" + " -e raise MCE >>>>> exception\n" " -h, --help print this >>>>> page\n" " -p, --phyaddr physical address\n" >>>>> - " -t, --type=error error type\n" >>>>> - " 0 : MCE_SRAO_MEM\n" >>>>> - " 1 : MCE_SRAO_LLC\n" >>>>> - " 2 : CMCI_UCNA_LLC\n" >>>>> - "\n" >>>>> ); >>>>> + >>>>> + if (cpu_vendor == CPU_VENDOR_INTEL) { >>>>> + printf( >>>>> + " -t, --type=error error type\n" >>>>> + " 0x0 : SRAO MEM\n" >>>>> + " 0x1 : SRAO LLC\n" >>>>> + " 0x2 : CMCI UCNA LLC\n"); + >>>>> } + if (cpu_vendor == CPU_VENDOR_AMD) { >>>>> + printf( >>>>> + " -t, --type=error error type\n" >>>>> + " 0x0: DRAM error\n" >>>>> + " 0x1: L3 cache error\n"); + } >>>>> + printf("\n"); +} >>>>> + >>>>> +static void cpuid(const unsigned int *input, unsigned int >>>>> regs[4]) +{ + unsigned int count = (input[1] =>>>>> XEN_CPUID_INPUT_UNUSED) ? 0 : input[1]; +#ifdef __i386__ >>>>> + /* Use the stack to avoid reg constraint failures with some >>>>> gcc flags */ + asm ( + "push %%ebx; push %%edx\n\t" >>>>> + "cpuid\n\t" >>>>> + "mov %%ebx,4(%4)\n\t" >>>>> + "mov %%edx,12(%4)\n\t" >>>>> + "pop %%edx; pop %%ebx\n\t" >>>>> + : "=a" (regs[0]), "=c" (regs[2]) >>>>> + : "0" (input[0]), "1" (count), "S" (regs) >>>>> + : "memory" ); >>>>> +#else >>>>> + asm ( >>>>> + "cpuid" >>>>> + : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" >>>>> (regs[3]) + : "0" (input[0]), "2" (count) ); +#endif +} >>>>> + >>>>> +/* Get the manufacturer brand name of the host processor. */ >>>>> +static void cpuid_brand_get(char *str, size_t len) +{ >>>>> + unsigned int input[2] = { 0, 0 }; >>>>> + union { >>>>> + unsigned int regs[4]; >>>>> + struct { >>>>> + char eax[4]; >>>>> + char ebx[4]; >>>>> + char ecx[4]; >>>>> + char edx[4]; >>>>> + } str_regs; >>>>> + } cpu_branding; >>>>> + >>>>> + cpuid(input, cpu_branding.regs); >>>>> + >>>>> + snprintf(str, len, "%c%c%c%c%c%c%c%c%c%c%c%c", >>>>> + cpu_branding.str_regs.ebx[0], >>>>> cpu_branding.str_regs.ebx[1], + >>>>> cpu_branding.str_regs.ebx[2], cpu_branding.str_regs.ebx[3], + >>>>> cpu_branding.str_regs.edx[0], cpu_branding.str_regs.edx[1], + >>>>> cpu_branding.str_regs.edx[2], cpu_branding.str_regs.edx[3], + >>>>> cpu_branding.str_regs.ecx[0], cpu_branding.str_regs.ecx[1], + >>>>> cpu_branding.str_regs.ecx[2], cpu_branding.str_regs.ecx[3]); } >>>>> >>>>> int main(int argc, char *argv[]) >>>>> { >>>>> - int type = MCE_SRAO_MEM; >>>>> + int type; >>>>> int c, opt_index; >>>>> uint32_t domid; >>>>> xc_interface *xc_handle; >>>>> - int cpu_nr; >>>>> - int64_t gaddr, gpfn, mfn, haddr, max_gpa; >>>>> + unsigned int cpu_nr; >>>>> + uint64_t gaddr, gpfn, mfn, haddr, max_gpa; >>>>> + char cpu_brand[13]; >>>>> >>>>> /* Default Value */ >>>>> domid = DOMID_XEN; >>>>> gaddr = 0x180020; >>>>> cpu_nr = 0; >>>>> >>>>> + cpu_vendor = CPU_VENDOR_UNKNOWN; >>>>> + cpuid_brand_get(cpu_brand, sizeof(cpu_brand)); >>>>> + if (strstr(cpu_brand, "AMD")) >>>>> + cpu_vendor = CPU_VENDOR_AMD; >>>>> + if (strstr(cpu_brand, "Intel")) >>>>> + cpu_vendor = CPU_VENDOR_INTEL; >>>>> + >>>>> + switch (cpu_vendor) { >>>>> + case CPU_VENDOR_AMD: >>>>> + type = AMD_MCE_MEM; >>>>> + break; >>>>> + case CPU_VENDOR_INTEL: >>>>> + type = INTEL_MCE_SRAO_MEM; >>>>> + break; >>>>> + case CPU_VENDOR_UNKNOWN: >>>>> + default: >>>>> + Lprintf("Unknown cpu vendor on this machine\n"); + >>>>> exit(EXIT_FAILURE); + } + >>>>> init_msr_inj(); >>>>> xc_handle = xc_interface_open(0, 0, 0); >>>>> if ( !xc_handle ) { >>>>> @@ -571,8 +787,8 @@ int main(int argc, char *argv[]) >>>>> exit(EXIT_FAILURE); } >>>>> >>>>> - while ( 1 ) { >>>>> - c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, >>>>> &opt_index); + for (;;) { + c = getopt_long(argc, argv, >>>>> "c:Dd:t:hp:r:e", opts, &opt_index); if ( c == -1 ) >>>>> break; switch ( c ) { >>>>> @@ -580,23 +796,26 @@ int main(int argc, char *argv[]) >>>>> dump=1; break; case ''c'': >>>>> - cpu_nr = strtol(optarg, &optarg, 10); >>>>> + cpu_nr = strtoul(optarg, &optarg, 0); >>>>> if ( strlen(optarg) != 0 ) >>>>> err(xc_handle, "Please input a digit parameter >>>>> for CPU\n"); break; case ''d'': >>>>> - domid = strtol(optarg, &optarg, 10); >>>>> + domid = strtoul(optarg, &optarg, 0); >>>>> if ( strlen(optarg) != 0 ) >>>>> err(xc_handle, "Please input a digit parameter >>>>> for domain\n"); break; >>>>> case ''p'': >>>>> - gaddr = strtol(optarg, &optarg, 0); >>>>> + gaddr = strtoul(optarg, &optarg, 0); >>>>> if ( strlen(optarg) != 0 ) >>>>> err(xc_handle, "Please input correct page >>>>> address\n"); break; >>>>> case ''t'': >>>>> type = strtol(optarg, NULL, 0); >>>>> break; >>>>> + case ''e'': >>>>> + opt_exception = 1; >>>>> + break; >>>>> case ''h'': >>>>> default: >>>>> help(); >>>>> @@ -627,19 +846,36 @@ int main(int argc, char *argv[]) goto >>>>> out; } >>>>> >>>>> - switch ( type ) >>>>> - { >>>>> - case MCE_SRAO_MEM: >>>>> - inject_mem_srao(xc_handle, cpu_nr, domid, gaddr); >>>>> - break; >>>>> - case MCE_SRAO_LLC: >>>>> - inject_llc_srao(xc_handle, cpu_nr, domid, gaddr); >>>>> - break; >>>>> - case CMCI_UCNA_LLC: >>>>> - inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr); + >>>>> switch ( cpu_vendor ) { + case CPU_VENDOR_INTEL: + >>>>> switch ( type ) { + case INTEL_MCE_SRAO_MEM: >>>>> + intel_inject_mem_srao(xc_handle, cpu_nr, domid, >>>>> gaddr); + break; + case INTEL_MCE_SRAO_LLC: >>>>> + intel_inject_llc_srao(xc_handle, cpu_nr, domid, >>>>> gaddr); + break; + case INTEL_CMCI_UCNA_LLC: >>>>> + intel_inject_llc_ucna(xc_handle, cpu_nr, domid, >>>>> gaddr); + break; + default: >>>>> + err(xc_handle, "Unsupported error type\n"); + >>>>> break; + } >>>>> break; >>>>> - default: >>>>> - err(xc_handle, "Unsupported error type\n"); + + case >>>>> CPU_VENDOR_AMD: + switch ( type ) { >>>>> + case AMD_MCE_MEM: >>>>> + amd_inject_mem(xc_handle, cpu_nr, domid, gaddr); + >>>>> break; + case AMD_MCE_L3: >>>>> + amd_inject_l3(xc_handle, cpu_nr, domid, gaddr); + >>>>> break; + default: + err(xc_handle, "Unsupported >>>>> error type\n"); + >>>>> break; + } >>>>> break; >>>>> }