Jiang, Yunhong
2010-Apr-16 10:55 UTC
[Xen-devel] [PATCH] Clean-up on MCA MSR virtualization and vMCE injection
Clean-up on MCA MSR virtualization and vMCE injection
Remove all virtual MCE related work into a seperated file.
It also try to do some clean-up on the vMCE, including:
a) renmae some function name like mce_init_msr/mce_rdmsr to be
vmce_init_msr/vmce_rdmsr to make it more straightforward,
b) make the vmca_msrs be a pointer in arch_domain,
to decrease arch_domain''s size
c) extract per-bank MCA MSR access to be seperated function
(bank_mce_wrmsr/bank_mce_rdmsr) to make it be a bit cleaner.
d) A new file xen/include/asm-x86/mce.h is added for vmce related header.
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/Makefile
--- a/xen/arch/x86/cpu/mcheck/Makefile Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/Makefile Fri Apr 16 18:55:03 2010 +0800
@@ -7,3 +7,4 @@ obj-y += mce_intel.o
obj-y += mce_intel.o
obj-y += mce_amd_quirks.o
obj-y += non-fatal.o
+obj-y += vmce.o
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Apr 16 18:55:03 2010 +0800
@@ -31,11 +31,11 @@ unsigned int nr_mce_banks;
unsigned int nr_mce_banks;
int mce_broadcast = 0;
-static uint64_t g_mcg_cap;
+uint64_t g_mcg_cap;
/* Real value in physical CTL MSR */
-static uint64_t h_mcg_ctl = 0UL;
-static uint64_t *h_mci_ctrl;
+uint64_t h_mcg_ctl = 0UL;
+uint64_t *h_mci_ctrl;
int firstbank;
static void intpose_init(void);
@@ -752,234 +752,6 @@ u64 mce_cap_init(void)
return value;
}
-/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */
-void mce_init_msr(struct domain *d)
-{
- d->arch.vmca_msrs.mcg_status = 0x0;
- d->arch.vmca_msrs.mcg_cap = g_mcg_cap;
- d->arch.vmca_msrs.mcg_ctl = ~(uint64_t)0x0;
- d->arch.vmca_msrs.nr_injection = 0;
- memset(d->arch.vmca_msrs.mci_ctl, ~0,
- sizeof(d->arch.vmca_msrs.mci_ctl));
- INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header);
- spin_lock_init(&d->arch.vmca_msrs.lock);
-}
-
-int mce_rdmsr(uint32_t msr, uint64_t *val)
-{
- struct domain *d = current->domain;
- int ret = 1;
- unsigned int bank;
- struct bank_entry *entry = NULL;
-
- *val = 0;
- spin_lock(&d->arch.vmca_msrs.lock);
-
- switch ( msr )
- {
- case MSR_IA32_MCG_STATUS:
- *val = d->arch.vmca_msrs.mcg_status;
- if (*val)
- mce_printk(MCE_VERBOSE,
- "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n",
*val);
- break;
- case MSR_IA32_MCG_CAP:
- *val = d->arch.vmca_msrs.mcg_cap;
- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP
0x%"PRIx64"\n",
- *val);
- break;
- case MSR_IA32_MCG_CTL:
- /* Always 0 if no CTL support */
- *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl;
- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL
0x%"PRIx64"\n",
- *val);
- break;
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
- bank = (msr - MSR_IA32_MC0_CTL) / 4;
- if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
- {
- mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n",
msr);
- ret = 0;
- break;
- }
- switch (msr & (MSR_IA32_MC0_CTL | 3))
- {
- case MSR_IA32_MC0_CTL:
- *val = d->arch.vmca_msrs.mci_ctl[bank] &
- (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL);
- mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL
0x%"PRIx64"\n",
- bank, *val);
- break;
- case MSR_IA32_MC0_STATUS:
- /* Only error bank is read. Non-error banks simply return. */
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if (entry->bank == bank) {
- *val = entry->mci_status;
- mce_printk(MCE_VERBOSE,
- "MCE: rd MC%u_STATUS in vMCE# context "
- "value 0x%"PRIx64"\n", bank,
*val);
- }
- else
- entry = NULL;
- }
- break;
- case MSR_IA32_MC0_ADDR:
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == bank )
- {
- *val = entry->mci_addr;
- mce_printk(MCE_VERBOSE,
- "MCE: rdmsr MC%u_ADDR in vMCE# context "
- "0x%"PRIx64"\n", bank, *val);
- }
- }
- break;
- case MSR_IA32_MC0_MISC:
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == bank )
- {
- *val = entry->mci_misc;
- mce_printk(MCE_VERBOSE,
- "MCE: rd MC%u_MISC in vMCE# context "
- "0x%"PRIx64"\n", bank, *val);
- }
- }
- break;
- }
- break;
- default:
- switch ( boot_cpu_data.x86_vendor )
- {
- case X86_VENDOR_INTEL:
- ret = intel_mce_rdmsr(msr, val);
- break;
- default:
- ret = 0;
- break;
- }
- break;
- }
-
- spin_unlock(&d->arch.vmca_msrs.lock);
- return ret;
-}
-
-int mce_wrmsr(u32 msr, u64 val)
-{
- struct domain *d = current->domain;
- struct bank_entry *entry = NULL;
- unsigned int bank;
- int ret = 1;
-
- if ( !g_mcg_cap )
- return 0;
-
- spin_lock(&d->arch.vmca_msrs.lock);
-
- switch ( msr )
- {
- case MSR_IA32_MCG_CTL:
- d->arch.vmca_msrs.mcg_ctl = val;
- break;
- case MSR_IA32_MCG_STATUS:
- d->arch.vmca_msrs.mcg_status = val;
- mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS
%"PRIx64"\n", val);
- /* For HVM guest, this is the point for deleting vMCE injection node */
- if ( d->is_hvm && (d->arch.vmca_msrs.nr_injection > 0)
)
- {
- d->arch.vmca_msrs.nr_injection--; /* Should be 0 */
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->mci_status & MCi_STATUS_VAL )
- mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have
"
- "been cleared before write MCG_STATUS
MSR\n");
-
- mce_printk(MCE_QUIET, "MCE: Delete HVM last injection
"
- "Node, nr_injection %u\n",
- d->arch.vmca_msrs.nr_injection);
- list_del(&entry->list);
- xfree(entry);
- }
- else
- mce_printk(MCE_QUIET, "MCE: Not found HVM guest"
- " last injection Node, something Wrong!\n");
- }
- break;
- case MSR_IA32_MCG_CAP:
- mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n");
- ret = -1;
- break;
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
- bank = (msr - MSR_IA32_MC0_CTL) / 4;
- if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
- {
- mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n",
msr);
- ret = 0;
- break;
- }
- switch ( msr & (MSR_IA32_MC0_CTL | 3) )
- {
- case MSR_IA32_MC0_CTL:
- d->arch.vmca_msrs.mci_ctl[bank] = val;
- break;
- case MSR_IA32_MC0_STATUS:
- /* Give the first entry of the list, it corresponds to current
- * vMCE# injection. When vMCE# is finished processing by the
- * the guest, this node will be deleted.
- * Only error bank is written. Non-error banks simply return.
- */
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == bank )
- entry->mci_status = val;
- mce_printk(MCE_VERBOSE,
- "MCE: wr MC%u_STATUS %"PRIx64" in
vMCE#\n",
- bank, val);
- }
- else
- mce_printk(MCE_VERBOSE,
- "MCE: wr MC%u_STATUS %"PRIx64"\n",
bank, val);
- break;
- case MSR_IA32_MC0_ADDR:
- mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n",
bank);
- ret = -1;
- break;
- case MSR_IA32_MC0_MISC:
- mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n",
bank);
- ret = -1;
- break;
- }
- break;
- default:
- switch ( boot_cpu_data.x86_vendor )
- {
- case X86_VENDOR_INTEL:
- ret = intel_mce_wrmsr(msr, val);
- break;
- default:
- ret = 0;
- break;
- }
- break;
- }
-
- spin_unlock(&d->arch.vmca_msrs.lock);
- return ret;
-}
-
static void mcinfo_clear(struct mc_info *mi)
{
memset(mi, 0, sizeof(struct mc_info));
@@ -1238,11 +1010,11 @@ int mca_ctl_conflict(struct mcinfo_bank
return 1;
/* Will MCE happen in host if If host mcg_ctl is 0? */
- if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl )
+ if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl )
return 1;
bank_nr = bank->mc_bank;
- if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
+ if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
return 1;
return 0;
}
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.h Fri Apr 16 18:55:03 2010 +0800
@@ -164,4 +164,32 @@ int x86_mcinfo_add(struct mc_info *mi, v
int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
void x86_mcinfo_dump(struct mc_info *mi);
+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
+ uint64_t gstatus);
+int inject_vmce(struct domain *d);
+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct
mcinfo_global *global);
+
+extern uint64_t g_mcg_cap;
+/* Real value in physical CTL MSR */
+extern uint64_t h_mcg_ctl;
+extern uint64_t *h_mci_ctrl;
+
+extern unsigned int nr_mce_banks;
+
+static inline int mce_vendor_bank_msr(uint32_t msr)
+{
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ (msr > MSR_IA32_MC0_CTL2 && msr < (MSR_IA32_MC0_CTL2 +
nr_mce_banks)) )
+ return 1;
+ return 0;
+}
+
+static inline int mce_bank_msr(uint32_t msr)
+{
+ if ( (msr > MSR_IA32_MC0_CTL2 &&
+ msr < (MSR_IA32_MC0_CTL + 4 * nr_mce_banks - 1)) ||
+ mce_vendor_bank_msr(msr) )
+ return 1;
+ return 0;
+}
#endif /* _MCE_H */
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Apr 16 18:55:03 2010 +0800
@@ -11,6 +11,7 @@
#include <asm/system.h>
#include <asm/msr.h>
#include <asm/p2m.h>
+#include <asm/mce.h>
#include "mce.h"
#include "x86_mca.h"
@@ -199,126 +200,6 @@ intel_get_extended_msrs(struct mc_info *
return MCA_EXTINFO_GLOBAL;
}
-/* This node list records errors impacting a domain. when one
- * MCE# happens, one error bank impacts a domain. This error node
- * will be inserted to the tail of the per_dom data for vMCE# MSR
- * virtualization. When one vMCE# injection is finished processing
- * processed by guest, the corresponding node will be deleted.
- * This node list is for GUEST vMCE# MSRS virtualization.
- */
-static struct bank_entry* alloc_bank_entry(void) {
- struct bank_entry *entry;
-
- entry = xmalloc(struct bank_entry);
- if (!entry) {
- printk(KERN_ERR "MCE: malloc bank_entry failed\n");
- return NULL;
- }
- memset(entry, 0x0, sizeof(entry));
- INIT_LIST_HEAD(&entry->list);
- return entry;
-}
-
-/* Fill error bank info for #vMCE injection and GUEST vMCE#
- * MSR virtualization data
- * 1) Log down how many nr_injections of the impacted.
- * 2) Copy MCE# error bank to impacted DOM node list,
- for vMCE# MSRs virtualization
-*/
-
-static int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
- uint64_t gstatus) {
- struct bank_entry *entry;
-
- /* This error bank impacts one domain, we need to fill domain related
- * data for vMCE MSRs virtualization and vMCE# injection */
- if (mc_bank->mc_domid != (uint16_t)~0) {
- /* For HVM guest, Only when first vMCE is consumed by HVM guest
successfully,
- * will we generete another node and inject another vMCE
- */
- if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection >
0) )
- {
- mce_printk(MCE_QUIET, "MCE: HVM guest has not handled
previous"
- " vMCE yet!\n");
- return -1;
- }
- entry = alloc_bank_entry();
- if (entry == NULL)
- return -1;
-
- entry->mci_status = mc_bank->mc_status;
- entry->mci_addr = mc_bank->mc_addr;
- entry->mci_misc = mc_bank->mc_misc;
- entry->bank = mc_bank->mc_bank;
-
- spin_lock(&d->arch.vmca_msrs.lock);
- /* New error Node, insert to the tail of the per_dom data */
- list_add_tail(&entry->list,
&d->arch.vmca_msrs.impact_header);
- /* Fill MSR global status */
- d->arch.vmca_msrs.mcg_status = gstatus;
- /* New node impact the domain, need another vMCE# injection*/
- d->arch.vmca_msrs.nr_injection++;
- spin_unlock(&d->arch.vmca_msrs.lock);
-
- mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d "
- "status %"PRIx64" addr %"PRIx64" domid
%d]\n ",
- mc_bank->mc_bank, mc_bank->mc_status,
mc_bank->mc_addr,
- mc_bank->mc_domid);
- }
- return 0;
-}
-
-static int inject_mce(struct domain *d)
-{
- int cpu = smp_processor_id();
- cpumask_t affinity;
-
- /* PV guest and HVM guest have different vMCE# injection
- * methods*/
-
- if ( !test_and_set_bool(d->vcpu[0]->mce_pending) )
- {
- if (d->is_hvm)
- {
- mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM
%d\n",
- d->domain_id);
- vcpu_kick(d->vcpu[0]);
- }
- /* PV guest including DOM0 */
- else
- {
- mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n",
- d->domain_id);
- if (guest_has_trap_callback
- (d, 0, TRAP_machine_check))
- {
- d->vcpu[0]->cpu_affinity_tmp -
d->vcpu[0]->cpu_affinity;
- cpus_clear(affinity);
- cpu_set(cpu, affinity);
- mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old
%d\n", cpu,
- d->vcpu[0]->processor);
- vcpu_set_affinity(d->vcpu[0], &affinity);
- vcpu_kick(d->vcpu[0]);
- }
- else
- {
- mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE
handler\n");
- domain_crash(d);
- }
- }
- }
- else {
- /* new vMCE comes while first one has not been injected yet,
- * in this case, inject fail. [We can''t lose this vMCE for
- * the mce node''s consistency].
- */
- mce_printk(MCE_QUIET, "There''s a pending vMCE waiting to
be injected "
- " to this DOM%d!\n", d->domain_id);
- return -1;
- }
- return 0;
-}
static void intel_UCR_handler(struct mcinfo_bank *bank,
struct mcinfo_global *global,
@@ -377,7 +258,7 @@ static void intel_UCR_handler(struct mci
return;
}
/* We will inject vMCE to DOMU*/
- if ( inject_mce(d) < 0 )
+ if ( inject_vmce(d) < 0 )
{
mce_printk(MCE_QUIET, "inject vMCE to
DOM%d"
" failed\n",
d->domain_id);
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/vmce.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/vmce.c Fri Apr 16 18:55:03 2010 +0800
@@ -0,0 +1,451 @@
+/*
+ * vmce.c - virtual MCE support
+ */
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <xen/delay.h>
+#include <xen/smp.h>
+#include <xen/mm.h>
+#include <asm/processor.h>
+#include <public/sysctl.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+#include <asm/p2m.h>
+#include "mce.h"
+#include "x86_mca.h"
+
+int vmce_init_msr(struct domain *d)
+{
+ if ( dom_vmce(d) )
+ {
+ dprintk(XENLOG_G_WARNING, "Domain %d has inited vMCE\n",
d->domain_id);
+ return 0;
+ }
+
+ /* Allocate the vmca_msrs and mci_ctl togother */
+ dom_vmce(d) = xmalloc(struct domain_mca_msrs);
+ if ( !dom_vmce(d) )
+ return -ENOMEM;
+
+ dom_vmce(d)->mci_ctl = xmalloc_array(uint64_t, nr_mce_banks);
+ if ( !dom_vmce(d)->mci_ctl )
+ {
+ xfree(dom_vmce(d));
+ return -ENOMEM;
+ }
+ memset(d->arch.vmca_msrs->mci_ctl, ~0,
+ sizeof(d->arch.vmca_msrs->mci_ctl));
+
+ dom_vmce(d)->mcg_status = 0x0;
+ dom_vmce(d)->mcg_cap = g_mcg_cap;
+ dom_vmce(d)->mcg_ctl = ~(uint64_t)0x0;
+ dom_vmce(d)->nr_injection = 0;
+
+ INIT_LIST_HEAD(&d->arch.vmca_msrs->impact_header);
+ spin_lock_init(&d->arch.vmca_msrs->lock);
+
+ return 0;
+}
+
+/*
+ * Caller should make sure msr is bank msr */
+static int bank_mce_rdmsr(struct domain *d, uint32_t msr, uint64_t *val)
+{
+ int bank, ret = 1;
+ struct domain_mca_msrs *vmce;
+ struct bank_entry *entry = NULL;
+
+ if (!d)
+ return -EINVAL;
+ vmce = dom_vmce(d);
+ ASSERT(vmce);
+
+ bank = (msr - MSR_IA32_MC0_CTL) / 4;
+ if (bank >= nr_mce_banks)
+ return -1;
+
+ switch (msr & (MSR_IA32_MC0_CTL | 3))
+ {
+ case MSR_IA32_MC0_CTL:
+ *val = vmce->mci_ctl[bank] &
+ (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL);
+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL
0x%"PRIx64"\n",
+ bank, *val);
+ break;
+ case MSR_IA32_MC0_STATUS:
+ /* Only error bank is read. Non-error banks simply return. */
+ if ( !list_empty(&vmce->impact_header) )
+ {
+ entry = list_entry(vmce->impact_header.next,
+ struct bank_entry, list);
+ if (entry->bank == bank) {
+ *val = entry->mci_status;
+ mce_printk(MCE_VERBOSE,
+ "MCE: rd MC%u_STATUS in vMCE# context "
+ "value 0x%"PRIx64"\n", bank, *val);
+ }
+ else
+ entry = NULL;
+ }
+ break;
+ case MSR_IA32_MC0_ADDR:
+ if ( !list_empty(&vmce->impact_header) )
+ {
+ entry = list_entry(vmce->impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == bank )
+ {
+ *val = entry->mci_addr;
+ mce_printk(MCE_VERBOSE,
+ "MCE: rdmsr MC%u_ADDR in vMCE# context "
+ "0x%"PRIx64"\n", bank, *val);
+ }
+ }
+ break;
+ case MSR_IA32_MC0_MISC:
+ if ( !list_empty(&vmce->impact_header) )
+ {
+ entry = list_entry(vmce->impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == bank )
+ {
+ *val = entry->mci_misc;
+ mce_printk(MCE_VERBOSE,
+ "MCE: rd MC%u_MISC in vMCE# context "
+ "0x%"PRIx64"\n", bank, *val);
+ }
+ }
+ break;
+ default:
+ switch ( boot_cpu_data.x86_vendor )
+ {
+ case X86_VENDOR_INTEL:
+ ret = intel_mce_rdmsr(msr, val);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * < 0: Unsupported and will #GP fault to guest
+ * = 0: Not handled, should be handled by other components
+ * > 0: Success
+ */
+int vmce_rdmsr(uint32_t msr, uint64_t *val)
+{
+ struct domain *d = current->domain;
+ struct domain_mca_msrs *vmce;
+ int ret = 1;
+
+ *val = 0;
+
+ vmce = dom_vmce(d);
+ if ( !vmce )
+ {
+ /* XXX more handle here */
+ return 0;
+ }
+
+ spin_lock(&d->arch.vmca_msrs->lock);
+
+ switch ( msr )
+ {
+ case MSR_IA32_MCG_STATUS:
+ *val = vmce->mcg_status;
+ if (*val)
+ mce_printk(MCE_VERBOSE,
+ "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n",
*val);
+ break;
+ case MSR_IA32_MCG_CAP:
+ *val = vmce->mcg_cap;
+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP
0x%"PRIx64"\n",
+ *val);
+ break;
+ case MSR_IA32_MCG_CTL:
+ /* Always 0 if no CTL support */
+ *val = vmce->mcg_ctl & h_mcg_ctl;
+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL
0x%"PRIx64"\n",
+ *val);
+ break;
+ default:
+ if ( mce_bank_msr(msr) )
+ ret = bank_mce_rdmsr(d, msr, val);
+ else
+ ret = 0;
+ break;
+ }
+
+ spin_unlock(&d->arch.vmca_msrs->lock);
+ return ret;
+}
+
+int bank_mce_wrmsr(struct domain *d, u32 msr, u64 val)
+{
+ int bank, ret = 1;
+ struct domain_mca_msrs *vmce;
+ struct bank_entry *entry = NULL;
+
+ if (!d)
+ return -EINVAL;
+ vmce = dom_vmce(d);
+ ASSERT(vmce && vmce->mci_ctl);
+
+ bank = (msr - MSR_IA32_MC0_CTL) / 4;
+ if (bank >= nr_mce_banks)
+ return -EINVAL;
+
+ switch ( msr & (MSR_IA32_MC0_CTL | 3) )
+ {
+ case MSR_IA32_MC0_CTL:
+ vmce->mci_ctl[bank] = val;
+ break;
+ case MSR_IA32_MC0_STATUS:
+ /* Give the first entry of the list, it corresponds to current
+ * vMCE# injection. When vMCE# is finished processing by the
+ * the guest, this node will be deleted.
+ * Only error bank is written. Non-error banks simply return.
+ */
+ if ( !list_empty(&d->arch.vmca_msrs->impact_header) )
+ {
+ entry = list_entry(d->arch.vmca_msrs->impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == bank )
+ entry->mci_status = val;
+ mce_printk(MCE_VERBOSE,
+ "MCE: wr MC%u_STATUS %"PRIx64" in
vMCE#\n",
+ bank, val);
+ }
+ else
+ mce_printk(MCE_VERBOSE,
+ "MCE: wr MC%u_STATUS %"PRIx64"\n",
bank, val);
+ break;
+ case MSR_IA32_MC0_ADDR:
+ mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n",
bank);
+ ret = -1;
+ break;
+ case MSR_IA32_MC0_MISC:
+ mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n",
bank);
+ ret = -1;
+ break;
+ default:
+ switch ( boot_cpu_data.x86_vendor )
+ {
+ case X86_VENDOR_INTEL:
+ ret = intel_mce_wrmsr(msr, val);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * < 0: Unsupported and will #GP fault to guest
+ * = 0: Not handled, should be handled by other components
+ * > 0: Success
+ */
+int vmce_wrmsr(u32 msr, u64 val)
+{
+ struct domain *d = current->domain;
+ struct bank_entry *entry = NULL;
+ struct domain_mca_msrs *vmce;
+ int ret = 1;
+
+ if ( !g_mcg_cap )
+ return 0;
+
+ vmce = dom_vmce(d);
+ spin_lock(&vmce->lock);
+
+ switch ( msr )
+ {
+ case MSR_IA32_MCG_CTL:
+ vmce->mcg_ctl = val;
+ break;
+ case MSR_IA32_MCG_STATUS:
+ vmce->mcg_status = val;
+ mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS
%"PRIx64"\n", val);
+ /* For HVM guest, this is the point for deleting vMCE injection node */
+ if ( d->is_hvm && (vmce->nr_injection > 0) )
+ {
+ vmce->nr_injection--; /* Should be 0 */
+ if ( !list_empty(&vmce->impact_header) )
+ {
+ entry = list_entry(vmce->impact_header.next,
+ struct bank_entry, list);
+ if ( entry->mci_status & MCi_STATUS_VAL )
+ mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have
"
+ "been cleared before write MCG_STATUS
MSR\n");
+
+ mce_printk(MCE_QUIET, "MCE: Delete HVM last injection
"
+ "Node, nr_injection %u\n",
+ vmce->nr_injection);
+ list_del(&entry->list);
+ xfree(entry);
+ }
+ else
+ mce_printk(MCE_QUIET, "MCE: Not found HVM guest"
+ " last injection Node, something Wrong!\n");
+ }
+ break;
+ case MSR_IA32_MCG_CAP:
+ mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n");
+ ret = -1;
+ break;
+ default:
+ if ( mce_bank_msr(msr) )
+ ret = bank_mce_wrmsr(d, msr, val);
+ else
+ ret = 0;
+ break;
+ }
+
+ spin_unlock(&vmce->lock);
+ return ret;
+}
+
+int inject_vmce(struct domain *d)
+{
+ int cpu = smp_processor_id();
+ cpumask_t affinity;
+
+ /* PV guest and HVM guest have different vMCE# injection
+ * methods*/
+ if ( !test_and_set_bool(d->vcpu[0]->mce_pending) )
+ {
+ if (d->is_hvm)
+ {
+ mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM
%d\n",
+ d->domain_id);
+ vcpu_kick(d->vcpu[0]);
+ }
+ /* PV guest including DOM0 */
+ else
+ {
+ mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n",
+ d->domain_id);
+ if (guest_has_trap_callback
+ (d, 0, TRAP_machine_check))
+ {
+ d->vcpu[0]->cpu_affinity_tmp +
d->vcpu[0]->cpu_affinity;
+ cpus_clear(affinity);
+ cpu_set(cpu, affinity);
+ mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old
%d\n", cpu,
+ d->vcpu[0]->processor);
+ vcpu_set_affinity(d->vcpu[0], &affinity);
+ vcpu_kick(d->vcpu[0]);
+ }
+ else
+ {
+ mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE
handler\n");
+ domain_crash(d);
+ }
+ }
+ }
+ else {
+ /* new vMCE comes while first one has not been injected yet,
+ * in this case, inject fail. [We can''t lose this vMCE for
+ * the mce node''s consistency].
+ */
+ mce_printk(MCE_QUIET, "There''s a pending vMCE waiting to
be injected "
+ " to this DOM%d!\n", d->domain_id);
+ return -1;
+ }
+ return 0;
+}
+
+/* This node list records errors impacting a domain. when one
+ * MCE# happens, one error bank impacts a domain. This error node
+ * will be inserted to the tail of the per_dom data for vMCE# MSR
+ * virtualization. When one vMCE# injection is finished processing
+ * processed by guest, the corresponding node will be deleted.
+ * This node list is for GUEST vMCE# MSRS virtualization.
+ */
+static struct bank_entry* alloc_bank_entry(void) {
+ struct bank_entry *entry;
+
+ entry = xmalloc(struct bank_entry);
+ if (!entry) {
+ printk(KERN_ERR "MCE: malloc bank_entry failed\n");
+ return NULL;
+ }
+ memset(entry, 0x0, sizeof(entry));
+ INIT_LIST_HEAD(&entry->list);
+ return entry;
+}
+
+/* Fill error bank info for #vMCE injection and GUEST vMCE#
+ * MSR virtualization data
+ * 1) Log down how many nr_injections of the impacted.
+ * 2) Copy MCE# error bank to impacted DOM node list,
+ for vMCE# MSRs virtualization
+*/
+
+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
+ uint64_t gstatus) {
+ struct bank_entry *entry;
+
+ /* This error bank impacts one domain, we need to fill domain related
+ * data for vMCE MSRs virtualization and vMCE# injection */
+ if (mc_bank->mc_domid != (uint16_t)~0) {
+ /* For HVM guest, Only when first vMCE is consumed by HVM guest
successfully,
+ * will we generete another node and inject another vMCE
+ */
+ if ( (d->is_hvm) && (d->arch.vmca_msrs->nr_injection
> 0) )
+ {
+ mce_printk(MCE_QUIET, "MCE: HVM guest has not handled
previous"
+ " vMCE yet!\n");
+ return -1;
+ }
+ entry = alloc_bank_entry();
+ if (entry == NULL)
+ return -1;
+
+ entry->mci_status = mc_bank->mc_status;
+ entry->mci_addr = mc_bank->mc_addr;
+ entry->mci_misc = mc_bank->mc_misc;
+ entry->bank = mc_bank->mc_bank;
+
+ spin_lock(&d->arch.vmca_msrs->lock);
+ /* New error Node, insert to the tail of the per_dom data */
+ list_add_tail(&entry->list,
&d->arch.vmca_msrs->impact_header);
+ /* Fill MSR global status */
+ d->arch.vmca_msrs->mcg_status = gstatus;
+ /* New node impact the domain, need another vMCE# injection*/
+ d->arch.vmca_msrs->nr_injection++;
+ spin_unlock(&d->arch.vmca_msrs->lock);
+
+ mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d "
+ "status %"PRIx64" addr %"PRIx64" domid
%d]\n ",
+ mc_bank->mc_bank, mc_bank->mc_status,
mc_bank->mc_addr,
+ mc_bank->mc_domid);
+ }
+ return 0;
+}
+
+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct
mcinfo_global *global)
+{
+ int ret;
+
+ ret = fill_vmsr_data(bank, d, global->mc_gstatus);
+ if (ret < 0)
+ return ret;
+
+ return inject_vmce(d);
+}
+
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/domain.c Fri Apr 16 18:55:03 2010 +0800
@@ -49,6 +49,7 @@
#include <asm/msr.h>
#include <asm/traps.h>
#include <asm/nmi.h>
+#include <asm/mce.h>
#include <xen/numa.h>
#include <xen/iommu.h>
#ifdef CONFIG_COMPAT
@@ -501,7 +502,7 @@ int arch_domain_create(struct domain *d,
goto fail;
/* For Guest vMCE MSRs virtualization */
- mce_init_msr(d);
+ vmce_init_msr(d);
}
if ( is_hvm_domain(d) )
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c Fri Apr 16 18:55:03 2010 +0800
@@ -47,6 +47,7 @@
#include <asm/traps.h>
#include <asm/mc146818rtc.h>
#include <asm/spinlock.h>
+#include <asm/mce.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/vpt.h>
#include <asm/hvm/support.h>
@@ -2061,7 +2062,7 @@ int hvm_msr_read_intercept(struct cpu_us
break;
default:
- ret = mce_rdmsr(ecx, &msr_content);
+ ret = vmce_rdmsr(ecx, &msr_content);
if ( ret < 0 )
goto gp_fault;
else if ( ret )
@@ -2160,7 +2161,7 @@ int hvm_msr_write_intercept(struct cpu_u
break;
default:
- ret = mce_wrmsr(ecx, msr_content);
+ ret = vmce_wrmsr(ecx, msr_content);
if ( ret < 0 )
goto gp_fault;
else if ( ret )
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/traps.c Fri Apr 16 18:55:03 2010 +0800
@@ -65,6 +65,7 @@
#include <asm/traps.h>
#include <asm/hvm/vpt.h>
#include <asm/hypercall.h>
+#include <asm/mce.h>
#include <public/arch-x86/cpuid.h>
/*
@@ -2295,7 +2296,7 @@ static int emulate_privileged_op(struct
if ( wrmsr_hypervisor_regs(regs->ecx, val) )
break;
- rc = mce_wrmsr(regs->ecx, val);
+ rc = vmce_wrmsr(regs->ecx, val);
if ( rc < 0 )
goto fail;
if ( rc )
@@ -2388,7 +2389,7 @@ static int emulate_privileged_op(struct
break;
}
- rc = mce_rdmsr(regs->ecx, &val);
+ rc = vmce_rdmsr(regs->ecx, &val);
if ( rc < 0 )
goto fail;
if ( rc )
@@ -2947,19 +2948,19 @@ void async_exception_cleanup(struct vcpu
{
struct domain *d = curr->domain;
- if ( !d->arch.vmca_msrs.nr_injection )
+ if ( !d->arch.vmca_msrs->nr_injection )
{
printk(XENLOG_WARNING "MCE: ret from vMCE#, "
"no injection node\n");
goto end;
}
- d->arch.vmca_msrs.nr_injection--;
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
+ d->arch.vmca_msrs->nr_injection--;
+ if ( !list_empty(&d->arch.vmca_msrs->impact_header) )
{
struct bank_entry *entry;
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ entry = list_entry(d->arch.vmca_msrs->impact_header.next,
struct bank_entry, list);
gdprintk(XENLOG_DEBUG, "MCE: delete last injection
node\n");
list_del(&entry->list);
@@ -2968,7 +2969,7 @@ void async_exception_cleanup(struct vcpu
printk(XENLOG_ERR "MCE: didn''t found last
injection node\n");
/* further injection */
- if ( d->arch.vmca_msrs.nr_injection > 0 &&
+ if ( d->arch.vmca_msrs->nr_injection > 0 &&
guest_has_trap_callback(d, 0, TRAP_machine_check) &&
!test_and_set_bool(curr->mce_pending) )
{
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/common/domain.c
--- a/xen/common/domain.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/common/domain.c Fri Apr 16 18:55:03 2010 +0800
@@ -616,6 +616,8 @@ static void complete_domain_destroy(stru
xfree(d->pirq_mask);
xfree(d->pirq_to_evtchn);
+ xfree(dom_vmce(d)->mci_ctl);
+ xfree(dom_vmce(d));
xsm_free_security_domain(d);
free_domain_struct(d);
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/include/asm-x86/domain.h Fri Apr 16 18:55:03 2010 +0800
@@ -6,6 +6,7 @@
#include <asm/hvm/vcpu.h>
#include <asm/hvm/domain.h>
#include <asm/e820.h>
+#include <asm/mce.h>
#include <public/vcpu.h>
#define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo)
@@ -214,32 +215,6 @@ typedef xen_domctl_cpuid_t cpuid_input_t
typedef xen_domctl_cpuid_t cpuid_input_t;
struct p2m_domain;
-
-/* Define for GUEST MCA handling */
-#define MAX_NR_BANKS 30
-
-/* This entry is for recording bank nodes for the impacted domain,
- * put into impact_header list. */
-struct bank_entry {
- struct list_head list;
- uint16_t bank;
- uint64_t mci_status;
- uint64_t mci_addr;
- uint64_t mci_misc;
-};
-
-struct domain_mca_msrs
-{
- /* Guest should not change below values after DOM boot up */
- uint64_t mcg_cap;
- uint64_t mcg_ctl;
- uint64_t mcg_status;
- uint64_t mci_ctl[MAX_NR_BANKS];
- uint16_t nr_injection;
- struct list_head impact_header;
- spinlock_t lock;
-};
-
struct time_scale {
int shift;
u32 mul_frac;
@@ -311,7 +286,7 @@ struct arch_domain
cpuid_input_t cpuids[MAX_CPUID_INPUT];
/* For Guest vMCA handling */
- struct domain_mca_msrs vmca_msrs;
+ struct domain_mca_msrs *vmca_msrs;
/* TSC management (emulation, pv, scaling, stats) */
int tsc_mode; /* see include/asm-x86/time.h */
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/include/asm-x86/mce.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/mce.h Fri Apr 16 18:55:03 2010 +0800
@@ -0,0 +1,36 @@
+#include <xen/types.h>
+#include <public/arch-x86/xen-mca.h>
+#ifndef _XEN_X86_MCE_H
+#define _XEN_X86_MCE_H
+/* Define for GUEST MCA handling */
+#define MAX_NR_BANKS 30
+
+/* This entry is for recording bank nodes for the impacted domain,
+ * put into impact_header list. */
+struct bank_entry {
+ struct list_head list;
+ uint16_t bank;
+ uint64_t mci_status;
+ uint64_t mci_addr;
+ uint64_t mci_misc;
+};
+
+struct domain_mca_msrs
+{
+ /* Guest should not change below values after DOM boot up */
+ uint64_t mcg_cap;
+ uint64_t mcg_ctl;
+ uint64_t mcg_status;
+ uint64_t *mci_ctl;
+ uint16_t nr_injection;
+ struct list_head impact_header;
+ spinlock_t lock;
+};
+
+#define dom_vmce(x) ((x)->arch.vmca_msrs)
+
+/* Guest vMCE MSRs virtualization */
+extern int vmce_init_msr(struct domain *d);
+extern int vmce_wrmsr(uint32_t msr, uint64_t val);
+extern int vmce_rdmsr(uint32_t msr, uint64_t *val);
+#endif
diff -r 7ee8bb40200a -r b4fd50c22d9c xen/include/asm-x86/traps.h
--- a/xen/include/asm-x86/traps.h Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/include/asm-x86/traps.h Fri Apr 16 18:55:03 2010 +0800
@@ -49,9 +49,4 @@ extern int send_guest_trap(struct domain
extern int send_guest_trap(struct domain *d, uint16_t vcpuid,
unsigned int trap_nr);
-/* Guest vMCE MSRs virtualization */
-extern void mce_init_msr(struct domain *d);
-extern int mce_wrmsr(uint32_t msr, uint64_t val);
-extern int mce_rdmsr(uint32_t msr, uint64_t *val);
-
#endif /* ASM_TRAP_H */
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Jiang, Yunhong
2010-Apr-16 11:02 UTC
RE: [Xen-devel] [PATCH] Clean-up on MCA MSR virtualization and vMCE injection
Sorry forgot the attachment. --jyh>-----Original Message----- >From: xen-devel-bounces@lists.xensource.com >[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Jiang, Yunhong >Sent: Friday, April 16, 2010 6:56 PM >To: Keir Fraser; Frank.Vanderlinden@Sun.COM; Christoph Egger >Cc: xen-devel@lists.xensource.com >Subject: [Xen-devel] [PATCH] Clean-up on MCA MSR virtualization and vMCE injection > >Clean-up on MCA MSR virtualization and vMCE injection > >Remove all virtual MCE related work into a seperated file. >It also try to do some clean-up on the vMCE, including: >a) renmae some function name like mce_init_msr/mce_rdmsr to be > vmce_init_msr/vmce_rdmsr to make it more straightforward, >b) make the vmca_msrs be a pointer in arch_domain, > to decrease arch_domain''s size >c) extract per-bank MCA MSR access to be seperated function > (bank_mce_wrmsr/bank_mce_rdmsr) to make it be a bit cleaner. >d) A new file xen/include/asm-x86/mce.h is added for vmce related header. > >Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> > >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/Makefile >--- a/xen/arch/x86/cpu/mcheck/Makefile Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/cpu/mcheck/Makefile Fri Apr 16 18:55:03 2010 +0800 >@@ -7,3 +7,4 @@ obj-y += mce_intel.o > obj-y += mce_intel.o > obj-y += mce_amd_quirks.o > obj-y += non-fatal.o >+obj-y += vmce.o >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/mce.c >--- a/xen/arch/x86/cpu/mcheck/mce.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Apr 16 18:55:03 2010 +0800 >@@ -31,11 +31,11 @@ unsigned int nr_mce_banks; > unsigned int nr_mce_banks; > > int mce_broadcast = 0; >-static uint64_t g_mcg_cap; >+uint64_t g_mcg_cap; > > /* Real value in physical CTL MSR */ >-static uint64_t h_mcg_ctl = 0UL; >-static uint64_t *h_mci_ctrl; >+uint64_t h_mcg_ctl = 0UL; >+uint64_t *h_mci_ctrl; > int firstbank; > > static void intpose_init(void); >@@ -752,234 +752,6 @@ u64 mce_cap_init(void) > return value; > } > >-/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */ >-void mce_init_msr(struct domain *d) >-{ >- d->arch.vmca_msrs.mcg_status = 0x0; >- d->arch.vmca_msrs.mcg_cap = g_mcg_cap; >- d->arch.vmca_msrs.mcg_ctl = ~(uint64_t)0x0; >- d->arch.vmca_msrs.nr_injection = 0; >- memset(d->arch.vmca_msrs.mci_ctl, ~0, >- sizeof(d->arch.vmca_msrs.mci_ctl)); >- INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header); >- spin_lock_init(&d->arch.vmca_msrs.lock); >-} >- >-int mce_rdmsr(uint32_t msr, uint64_t *val) >-{ >- struct domain *d = current->domain; >- int ret = 1; >- unsigned int bank; >- struct bank_entry *entry = NULL; >- >- *val = 0; >- spin_lock(&d->arch.vmca_msrs.lock); >- >- switch ( msr ) >- { >- case MSR_IA32_MCG_STATUS: >- *val = d->arch.vmca_msrs.mcg_status; >- if (*val) >- mce_printk(MCE_VERBOSE, >- "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val); >- break; >- case MSR_IA32_MCG_CAP: >- *val = d->arch.vmca_msrs.mcg_cap; >- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n", >- *val); >- break; >- case MSR_IA32_MCG_CTL: >- /* Always 0 if no CTL support */ >- *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl; >- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", >- *val); >- break; >- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: >- bank = (msr - MSR_IA32_MC0_CTL) / 4; >- if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) ) >- { >- mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr); >- ret = 0; >- break; >- } >- switch (msr & (MSR_IA32_MC0_CTL | 3)) >- { >- case MSR_IA32_MC0_CTL: >- *val = d->arch.vmca_msrs.mci_ctl[bank] & >- (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); >- mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL >0x%"PRIx64"\n", >- bank, *val); >- break; >- case MSR_IA32_MC0_STATUS: >- /* Only error bank is read. Non-error banks simply return. */ >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if (entry->bank == bank) { >- *val = entry->mci_status; >- mce_printk(MCE_VERBOSE, >- "MCE: rd MC%u_STATUS in vMCE# context " >- "value 0x%"PRIx64"\n", bank, *val); >- } >- else >- entry = NULL; >- } >- break; >- case MSR_IA32_MC0_ADDR: >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if ( entry->bank == bank ) >- { >- *val = entry->mci_addr; >- mce_printk(MCE_VERBOSE, >- "MCE: rdmsr MC%u_ADDR in vMCE# context " >- "0x%"PRIx64"\n", bank, *val); >- } >- } >- break; >- case MSR_IA32_MC0_MISC: >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if ( entry->bank == bank ) >- { >- *val = entry->mci_misc; >- mce_printk(MCE_VERBOSE, >- "MCE: rd MC%u_MISC in vMCE# context " >- "0x%"PRIx64"\n", bank, *val); >- } >- } >- break; >- } >- break; >- default: >- switch ( boot_cpu_data.x86_vendor ) >- { >- case X86_VENDOR_INTEL: >- ret = intel_mce_rdmsr(msr, val); >- break; >- default: >- ret = 0; >- break; >- } >- break; >- } >- >- spin_unlock(&d->arch.vmca_msrs.lock); >- return ret; >-} >- >-int mce_wrmsr(u32 msr, u64 val) >-{ >- struct domain *d = current->domain; >- struct bank_entry *entry = NULL; >- unsigned int bank; >- int ret = 1; >- >- if ( !g_mcg_cap ) >- return 0; >- >- spin_lock(&d->arch.vmca_msrs.lock); >- >- switch ( msr ) >- { >- case MSR_IA32_MCG_CTL: >- d->arch.vmca_msrs.mcg_ctl = val; >- break; >- case MSR_IA32_MCG_STATUS: >- d->arch.vmca_msrs.mcg_status = val; >- mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", >val); >- /* For HVM guest, this is the point for deleting vMCE injection node */ >- if ( d->is_hvm && (d->arch.vmca_msrs.nr_injection > 0) ) >- { >- d->arch.vmca_msrs.nr_injection--; /* Should be 0 */ >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if ( entry->mci_status & MCi_STATUS_VAL ) >- mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should >have " >- "been cleared before write MCG_STATUS >MSR\n"); >- >- mce_printk(MCE_QUIET, "MCE: Delete HVM last injection " >- "Node, nr_injection %u\n", >- d->arch.vmca_msrs.nr_injection); >- list_del(&entry->list); >- xfree(entry); >- } >- else >- mce_printk(MCE_QUIET, "MCE: Not found HVM guest" >- " last injection Node, something Wrong!\n"); >- } >- break; >- case MSR_IA32_MCG_CAP: >- mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n"); >- ret = -1; >- break; >- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: >- bank = (msr - MSR_IA32_MC0_CTL) / 4; >- if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) ) >- { >- mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr); >- ret = 0; >- break; >- } >- switch ( msr & (MSR_IA32_MC0_CTL | 3) ) >- { >- case MSR_IA32_MC0_CTL: >- d->arch.vmca_msrs.mci_ctl[bank] = val; >- break; >- case MSR_IA32_MC0_STATUS: >- /* Give the first entry of the list, it corresponds to current >- * vMCE# injection. When vMCE# is finished processing by the >- * the guest, this node will be deleted. >- * Only error bank is written. Non-error banks simply return. >- */ >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if ( entry->bank == bank ) >- entry->mci_status = val; >- mce_printk(MCE_VERBOSE, >- "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", >- bank, val); >- } >- else >- mce_printk(MCE_VERBOSE, >- "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); >- break; >- case MSR_IA32_MC0_ADDR: >- mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank); >- ret = -1; >- break; >- case MSR_IA32_MC0_MISC: >- mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank); >- ret = -1; >- break; >- } >- break; >- default: >- switch ( boot_cpu_data.x86_vendor ) >- { >- case X86_VENDOR_INTEL: >- ret = intel_mce_wrmsr(msr, val); >- break; >- default: >- ret = 0; >- break; >- } >- break; >- } >- >- spin_unlock(&d->arch.vmca_msrs.lock); >- return ret; >-} >- > static void mcinfo_clear(struct mc_info *mi) > { > memset(mi, 0, sizeof(struct mc_info)); >@@ -1238,11 +1010,11 @@ int mca_ctl_conflict(struct mcinfo_bank > return 1; > > /* Will MCE happen in host if If host mcg_ctl is 0? */ >- if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl ) >+ if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl ) > return 1; > > bank_nr = bank->mc_bank; >- if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) >+ if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) > return 1; > return 0; > } >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/mce.h >--- a/xen/arch/x86/cpu/mcheck/mce.h Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/cpu/mcheck/mce.h Fri Apr 16 18:55:03 2010 +0800 >@@ -164,4 +164,32 @@ int x86_mcinfo_add(struct mc_info *mi, v > int x86_mcinfo_add(struct mc_info *mi, void *mcinfo); > void x86_mcinfo_dump(struct mc_info *mi); > >+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, >+ uint64_t gstatus); >+int inject_vmce(struct domain *d); >+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct >mcinfo_global *global); >+ >+extern uint64_t g_mcg_cap; >+/* Real value in physical CTL MSR */ >+extern uint64_t h_mcg_ctl; >+extern uint64_t *h_mci_ctrl; >+ >+extern unsigned int nr_mce_banks; >+ >+static inline int mce_vendor_bank_msr(uint32_t msr) >+{ >+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && >+ (msr > MSR_IA32_MC0_CTL2 && msr < (MSR_IA32_MC0_CTL2 + >nr_mce_banks)) ) >+ return 1; >+ return 0; >+} >+ >+static inline int mce_bank_msr(uint32_t msr) >+{ >+ if ( (msr > MSR_IA32_MC0_CTL2 && >+ msr < (MSR_IA32_MC0_CTL + 4 * nr_mce_banks - 1)) || >+ mce_vendor_bank_msr(msr) ) >+ return 1; >+ return 0; >+} > #endif /* _MCE_H */ >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/mce_intel.c >--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Apr 16 18:55:03 2010 >+0800 >@@ -11,6 +11,7 @@ > #include <asm/system.h> > #include <asm/msr.h> > #include <asm/p2m.h> >+#include <asm/mce.h> > #include "mce.h" > #include "x86_mca.h" > >@@ -199,126 +200,6 @@ intel_get_extended_msrs(struct mc_info * > return MCA_EXTINFO_GLOBAL; > } > >-/* This node list records errors impacting a domain. when one >- * MCE# happens, one error bank impacts a domain. This error node >- * will be inserted to the tail of the per_dom data for vMCE# MSR >- * virtualization. When one vMCE# injection is finished processing >- * processed by guest, the corresponding node will be deleted. >- * This node list is for GUEST vMCE# MSRS virtualization. >- */ >-static struct bank_entry* alloc_bank_entry(void) { >- struct bank_entry *entry; >- >- entry = xmalloc(struct bank_entry); >- if (!entry) { >- printk(KERN_ERR "MCE: malloc bank_entry failed\n"); >- return NULL; >- } >- memset(entry, 0x0, sizeof(entry)); >- INIT_LIST_HEAD(&entry->list); >- return entry; >-} >- >-/* Fill error bank info for #vMCE injection and GUEST vMCE# >- * MSR virtualization data >- * 1) Log down how many nr_injections of the impacted. >- * 2) Copy MCE# error bank to impacted DOM node list, >- for vMCE# MSRs virtualization >-*/ >- >-static int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, >- uint64_t gstatus) { >- struct bank_entry *entry; >- >- /* This error bank impacts one domain, we need to fill domain related >- * data for vMCE MSRs virtualization and vMCE# injection */ >- if (mc_bank->mc_domid != (uint16_t)~0) { >- /* For HVM guest, Only when first vMCE is consumed by HVM guest >successfully, >- * will we generete another node and inject another vMCE >- */ >- if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection > 0) ) >- { >- mce_printk(MCE_QUIET, "MCE: HVM guest has not handled >previous" >- " vMCE yet!\n"); >- return -1; >- } >- entry = alloc_bank_entry(); >- if (entry == NULL) >- return -1; >- >- entry->mci_status = mc_bank->mc_status; >- entry->mci_addr = mc_bank->mc_addr; >- entry->mci_misc = mc_bank->mc_misc; >- entry->bank = mc_bank->mc_bank; >- >- spin_lock(&d->arch.vmca_msrs.lock); >- /* New error Node, insert to the tail of the per_dom data */ >- list_add_tail(&entry->list, &d->arch.vmca_msrs.impact_header); >- /* Fill MSR global status */ >- d->arch.vmca_msrs.mcg_status = gstatus; >- /* New node impact the domain, need another vMCE# injection*/ >- d->arch.vmca_msrs.nr_injection++; >- spin_unlock(&d->arch.vmca_msrs.lock); >- >- mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d " >- "status %"PRIx64" addr %"PRIx64" domid %d]\n ", >- mc_bank->mc_bank, mc_bank->mc_status, >mc_bank->mc_addr, >- mc_bank->mc_domid); >- } >- return 0; >-} >- >-static int inject_mce(struct domain *d) >-{ >- int cpu = smp_processor_id(); >- cpumask_t affinity; >- >- /* PV guest and HVM guest have different vMCE# injection >- * methods*/ >- >- if ( !test_and_set_bool(d->vcpu[0]->mce_pending) ) >- { >- if (d->is_hvm) >- { >- mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM %d\n", >- d->domain_id); >- vcpu_kick(d->vcpu[0]); >- } >- /* PV guest including DOM0 */ >- else >- { >- mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n", >- d->domain_id); >- if (guest_has_trap_callback >- (d, 0, TRAP_machine_check)) >- { >- d->vcpu[0]->cpu_affinity_tmp >- d->vcpu[0]->cpu_affinity; >- cpus_clear(affinity); >- cpu_set(cpu, affinity); >- mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, >old %d\n", cpu, >- d->vcpu[0]->processor); >- vcpu_set_affinity(d->vcpu[0], &affinity); >- vcpu_kick(d->vcpu[0]); >- } >- else >- { >- mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE >handler\n"); >- domain_crash(d); >- } >- } >- } >- else { >- /* new vMCE comes while first one has not been injected yet, >- * in this case, inject fail. [We can''t lose this vMCE for >- * the mce node''s consistency]. >- */ >- mce_printk(MCE_QUIET, "There''s a pending vMCE waiting to be injected >" >- " to this DOM%d!\n", d->domain_id); >- return -1; >- } >- return 0; >-} > > static void intel_UCR_handler(struct mcinfo_bank *bank, > struct mcinfo_global *global, >@@ -377,7 +258,7 @@ static void intel_UCR_handler(struct mci > return; > } > /* We will inject vMCE to DOMU*/ >- if ( inject_mce(d) < 0 ) >+ if ( inject_vmce(d) < 0 ) > { > mce_printk(MCE_QUIET, "inject vMCE to >DOM%d" > " failed\n", d->domain_id); >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/vmce.c >--- /dev/null Thu Jan 01 00:00:00 1970 +0000 >+++ b/xen/arch/x86/cpu/mcheck/vmce.c Fri Apr 16 18:55:03 2010 +0800 >@@ -0,0 +1,451 @@ >+/* >+ * vmce.c - virtual MCE support >+ */ >+ >+#include <xen/init.h> >+#include <xen/types.h> >+#include <xen/irq.h> >+#include <xen/event.h> >+#include <xen/kernel.h> >+#include <xen/delay.h> >+#include <xen/smp.h> >+#include <xen/mm.h> >+#include <asm/processor.h> >+#include <public/sysctl.h> >+#include <asm/system.h> >+#include <asm/msr.h> >+#include <asm/p2m.h> >+#include "mce.h" >+#include "x86_mca.h" >+ >+int vmce_init_msr(struct domain *d) >+{ >+ if ( dom_vmce(d) ) >+ { >+ dprintk(XENLOG_G_WARNING, "Domain %d has inited vMCE\n", >d->domain_id); >+ return 0; >+ } >+ >+ /* Allocate the vmca_msrs and mci_ctl togother */ >+ dom_vmce(d) = xmalloc(struct domain_mca_msrs); >+ if ( !dom_vmce(d) ) >+ return -ENOMEM; >+ >+ dom_vmce(d)->mci_ctl = xmalloc_array(uint64_t, nr_mce_banks); >+ if ( !dom_vmce(d)->mci_ctl ) >+ { >+ xfree(dom_vmce(d)); >+ return -ENOMEM; >+ } >+ memset(d->arch.vmca_msrs->mci_ctl, ~0, >+ sizeof(d->arch.vmca_msrs->mci_ctl)); >+ >+ dom_vmce(d)->mcg_status = 0x0; >+ dom_vmce(d)->mcg_cap = g_mcg_cap; >+ dom_vmce(d)->mcg_ctl = ~(uint64_t)0x0; >+ dom_vmce(d)->nr_injection = 0; >+ >+ INIT_LIST_HEAD(&d->arch.vmca_msrs->impact_header); >+ spin_lock_init(&d->arch.vmca_msrs->lock); >+ >+ return 0; >+} >+ >+/* >+ * Caller should make sure msr is bank msr */ >+static int bank_mce_rdmsr(struct domain *d, uint32_t msr, uint64_t *val) >+{ >+ int bank, ret = 1; >+ struct domain_mca_msrs *vmce; >+ struct bank_entry *entry = NULL; >+ >+ if (!d) >+ return -EINVAL; >+ vmce = dom_vmce(d); >+ ASSERT(vmce); >+ >+ bank = (msr - MSR_IA32_MC0_CTL) / 4; >+ if (bank >= nr_mce_banks) >+ return -1; >+ >+ switch (msr & (MSR_IA32_MC0_CTL | 3)) >+ { >+ case MSR_IA32_MC0_CTL: >+ *val = vmce->mci_ctl[bank] & >+ (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); >+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n", >+ bank, *val); >+ break; >+ case MSR_IA32_MC0_STATUS: >+ /* Only error bank is read. Non-error banks simply return. */ >+ if ( !list_empty(&vmce->impact_header) ) >+ { >+ entry = list_entry(vmce->impact_header.next, >+ struct bank_entry, list); >+ if (entry->bank == bank) { >+ *val = entry->mci_status; >+ mce_printk(MCE_VERBOSE, >+ "MCE: rd MC%u_STATUS in vMCE# context " >+ "value 0x%"PRIx64"\n", bank, *val); >+ } >+ else >+ entry = NULL; >+ } >+ break; >+ case MSR_IA32_MC0_ADDR: >+ if ( !list_empty(&vmce->impact_header) ) >+ { >+ entry = list_entry(vmce->impact_header.next, >+ struct bank_entry, list); >+ if ( entry->bank == bank ) >+ { >+ *val = entry->mci_addr; >+ mce_printk(MCE_VERBOSE, >+ "MCE: rdmsr MC%u_ADDR in vMCE# context " >+ "0x%"PRIx64"\n", bank, *val); >+ } >+ } >+ break; >+ case MSR_IA32_MC0_MISC: >+ if ( !list_empty(&vmce->impact_header) ) >+ { >+ entry = list_entry(vmce->impact_header.next, >+ struct bank_entry, list); >+ if ( entry->bank == bank ) >+ { >+ *val = entry->mci_misc; >+ mce_printk(MCE_VERBOSE, >+ "MCE: rd MC%u_MISC in vMCE# context " >+ "0x%"PRIx64"\n", bank, *val); >+ } >+ } >+ break; >+ default: >+ switch ( boot_cpu_data.x86_vendor ) >+ { >+ case X86_VENDOR_INTEL: >+ ret = intel_mce_rdmsr(msr, val); >+ break; >+ default: >+ ret = 0; >+ break; >+ } >+ break; >+ } >+ >+ return ret; >+} >+ >+/* >+ * < 0: Unsupported and will #GP fault to guest >+ * = 0: Not handled, should be handled by other components >+ * > 0: Success >+ */ >+int vmce_rdmsr(uint32_t msr, uint64_t *val) >+{ >+ struct domain *d = current->domain; >+ struct domain_mca_msrs *vmce; >+ int ret = 1; >+ >+ *val = 0; >+ >+ vmce = dom_vmce(d); >+ if ( !vmce ) >+ { >+ /* XXX more handle here */ >+ return 0; >+ } >+ >+ spin_lock(&d->arch.vmca_msrs->lock); >+ >+ switch ( msr ) >+ { >+ case MSR_IA32_MCG_STATUS: >+ *val = vmce->mcg_status; >+ if (*val) >+ mce_printk(MCE_VERBOSE, >+ "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val); >+ break; >+ case MSR_IA32_MCG_CAP: >+ *val = vmce->mcg_cap; >+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n", >+ *val); >+ break; >+ case MSR_IA32_MCG_CTL: >+ /* Always 0 if no CTL support */ >+ *val = vmce->mcg_ctl & h_mcg_ctl; >+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", >+ *val); >+ break; >+ default: >+ if ( mce_bank_msr(msr) ) >+ ret = bank_mce_rdmsr(d, msr, val); >+ else >+ ret = 0; >+ break; >+ } >+ >+ spin_unlock(&d->arch.vmca_msrs->lock); >+ return ret; >+} >+ >+int bank_mce_wrmsr(struct domain *d, u32 msr, u64 val) >+{ >+ int bank, ret = 1; >+ struct domain_mca_msrs *vmce; >+ struct bank_entry *entry = NULL; >+ >+ if (!d) >+ return -EINVAL; >+ vmce = dom_vmce(d); >+ ASSERT(vmce && vmce->mci_ctl); >+ >+ bank = (msr - MSR_IA32_MC0_CTL) / 4; >+ if (bank >= nr_mce_banks) >+ return -EINVAL; >+ >+ switch ( msr & (MSR_IA32_MC0_CTL | 3) ) >+ { >+ case MSR_IA32_MC0_CTL: >+ vmce->mci_ctl[bank] = val; >+ break; >+ case MSR_IA32_MC0_STATUS: >+ /* Give the first entry of the list, it corresponds to current >+ * vMCE# injection. When vMCE# is finished processing by the >+ * the guest, this node will be deleted. >+ * Only error bank is written. Non-error banks simply return. >+ */ >+ if ( !list_empty(&d->arch.vmca_msrs->impact_header) ) >+ { >+ entry = list_entry(d->arch.vmca_msrs->impact_header.next, >+ struct bank_entry, list); >+ if ( entry->bank == bank ) >+ entry->mci_status = val; >+ mce_printk(MCE_VERBOSE, >+ "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", >+ bank, val); >+ } >+ else >+ mce_printk(MCE_VERBOSE, >+ "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); >+ break; >+ case MSR_IA32_MC0_ADDR: >+ mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", >bank); >+ ret = -1; >+ break; >+ case MSR_IA32_MC0_MISC: >+ mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank); >+ ret = -1; >+ break; >+ default: >+ switch ( boot_cpu_data.x86_vendor ) >+ { >+ case X86_VENDOR_INTEL: >+ ret = intel_mce_wrmsr(msr, val); >+ break; >+ default: >+ ret = 0; >+ break; >+ } >+ break; >+ } >+ >+ return ret; >+} >+ >+/* >+ * < 0: Unsupported and will #GP fault to guest >+ * = 0: Not handled, should be handled by other components >+ * > 0: Success >+ */ >+int vmce_wrmsr(u32 msr, u64 val) >+{ >+ struct domain *d = current->domain; >+ struct bank_entry *entry = NULL; >+ struct domain_mca_msrs *vmce; >+ int ret = 1; >+ >+ if ( !g_mcg_cap ) >+ return 0; >+ >+ vmce = dom_vmce(d); >+ spin_lock(&vmce->lock); >+ >+ switch ( msr ) >+ { >+ case MSR_IA32_MCG_CTL: >+ vmce->mcg_ctl = val; >+ break; >+ case MSR_IA32_MCG_STATUS: >+ vmce->mcg_status = val; >+ mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", >val); >+ /* For HVM guest, this is the point for deleting vMCE injection node */ >+ if ( d->is_hvm && (vmce->nr_injection > 0) ) >+ { >+ vmce->nr_injection--; /* Should be 0 */ >+ if ( !list_empty(&vmce->impact_header) ) >+ { >+ entry = list_entry(vmce->impact_header.next, >+ struct bank_entry, list); >+ if ( entry->mci_status & MCi_STATUS_VAL ) >+ mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should >have " >+ "been cleared before write MCG_STATUS >MSR\n"); >+ >+ mce_printk(MCE_QUIET, "MCE: Delete HVM last injection " >+ "Node, nr_injection %u\n", >+ vmce->nr_injection); >+ list_del(&entry->list); >+ xfree(entry); >+ } >+ else >+ mce_printk(MCE_QUIET, "MCE: Not found HVM guest" >+ " last injection Node, something Wrong!\n"); >+ } >+ break; >+ case MSR_IA32_MCG_CAP: >+ mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n"); >+ ret = -1; >+ break; >+ default: >+ if ( mce_bank_msr(msr) ) >+ ret = bank_mce_wrmsr(d, msr, val); >+ else >+ ret = 0; >+ break; >+ } >+ >+ spin_unlock(&vmce->lock); >+ return ret; >+} >+ >+int inject_vmce(struct domain *d) >+{ >+ int cpu = smp_processor_id(); >+ cpumask_t affinity; >+ >+ /* PV guest and HVM guest have different vMCE# injection >+ * methods*/ >+ if ( !test_and_set_bool(d->vcpu[0]->mce_pending) ) >+ { >+ if (d->is_hvm) >+ { >+ mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM >DOM %d\n", >+ d->domain_id); >+ vcpu_kick(d->vcpu[0]); >+ } >+ /* PV guest including DOM0 */ >+ else >+ { >+ mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n", >+ d->domain_id); >+ if (guest_has_trap_callback >+ (d, 0, TRAP_machine_check)) >+ { >+ d->vcpu[0]->cpu_affinity_tmp >+ d->vcpu[0]->cpu_affinity; >+ cpus_clear(affinity); >+ cpu_set(cpu, affinity); >+ mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, >old %d\n", cpu, >+ d->vcpu[0]->processor); >+ vcpu_set_affinity(d->vcpu[0], &affinity); >+ vcpu_kick(d->vcpu[0]); >+ } >+ else >+ { >+ mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE >handler\n"); >+ domain_crash(d); >+ } >+ } >+ } >+ else { >+ /* new vMCE comes while first one has not been injected yet, >+ * in this case, inject fail. [We can''t lose this vMCE for >+ * the mce node''s consistency]. >+ */ >+ mce_printk(MCE_QUIET, "There''s a pending vMCE waiting to be injected >" >+ " to this DOM%d!\n", d->domain_id); >+ return -1; >+ } >+ return 0; >+} >+ >+/* This node list records errors impacting a domain. when one >+ * MCE# happens, one error bank impacts a domain. This error node >+ * will be inserted to the tail of the per_dom data for vMCE# MSR >+ * virtualization. When one vMCE# injection is finished processing >+ * processed by guest, the corresponding node will be deleted. >+ * This node list is for GUEST vMCE# MSRS virtualization. >+ */ >+static struct bank_entry* alloc_bank_entry(void) { >+ struct bank_entry *entry; >+ >+ entry = xmalloc(struct bank_entry); >+ if (!entry) { >+ printk(KERN_ERR "MCE: malloc bank_entry failed\n"); >+ return NULL; >+ } >+ memset(entry, 0x0, sizeof(entry)); >+ INIT_LIST_HEAD(&entry->list); >+ return entry; >+} >+ >+/* Fill error bank info for #vMCE injection and GUEST vMCE# >+ * MSR virtualization data >+ * 1) Log down how many nr_injections of the impacted. >+ * 2) Copy MCE# error bank to impacted DOM node list, >+ for vMCE# MSRs virtualization >+*/ >+ >+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, >+ uint64_t gstatus) { >+ struct bank_entry *entry; >+ >+ /* This error bank impacts one domain, we need to fill domain related >+ * data for vMCE MSRs virtualization and vMCE# injection */ >+ if (mc_bank->mc_domid != (uint16_t)~0) { >+ /* For HVM guest, Only when first vMCE is consumed by HVM guest >successfully, >+ * will we generete another node and inject another vMCE >+ */ >+ if ( (d->is_hvm) && (d->arch.vmca_msrs->nr_injection > 0) ) >+ { >+ mce_printk(MCE_QUIET, "MCE: HVM guest has not handled >previous" >+ " vMCE yet!\n"); >+ return -1; >+ } >+ entry = alloc_bank_entry(); >+ if (entry == NULL) >+ return -1; >+ >+ entry->mci_status = mc_bank->mc_status; >+ entry->mci_addr = mc_bank->mc_addr; >+ entry->mci_misc = mc_bank->mc_misc; >+ entry->bank = mc_bank->mc_bank; >+ >+ spin_lock(&d->arch.vmca_msrs->lock); >+ /* New error Node, insert to the tail of the per_dom data */ >+ list_add_tail(&entry->list, &d->arch.vmca_msrs->impact_header); >+ /* Fill MSR global status */ >+ d->arch.vmca_msrs->mcg_status = gstatus; >+ /* New node impact the domain, need another vMCE# injection*/ >+ d->arch.vmca_msrs->nr_injection++; >+ spin_unlock(&d->arch.vmca_msrs->lock); >+ >+ mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d " >+ "status %"PRIx64" addr %"PRIx64" domid %d]\n ", >+ mc_bank->mc_bank, mc_bank->mc_status, >mc_bank->mc_addr, >+ mc_bank->mc_domid); >+ } >+ return 0; >+} >+ >+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct >mcinfo_global *global) >+{ >+ int ret; >+ >+ ret = fill_vmsr_data(bank, d, global->mc_gstatus); >+ if (ret < 0) >+ return ret; >+ >+ return inject_vmce(d); >+} >+ >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/domain.c >--- a/xen/arch/x86/domain.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/domain.c Fri Apr 16 18:55:03 2010 +0800 >@@ -49,6 +49,7 @@ > #include <asm/msr.h> > #include <asm/traps.h> > #include <asm/nmi.h> >+#include <asm/mce.h> > #include <xen/numa.h> > #include <xen/iommu.h> > #ifdef CONFIG_COMPAT >@@ -501,7 +502,7 @@ int arch_domain_create(struct domain *d, > goto fail; > > /* For Guest vMCE MSRs virtualization */ >- mce_init_msr(d); >+ vmce_init_msr(d); > } > > if ( is_hvm_domain(d) ) >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/hvm/hvm.c >--- a/xen/arch/x86/hvm/hvm.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/hvm/hvm.c Fri Apr 16 18:55:03 2010 +0800 >@@ -47,6 +47,7 @@ > #include <asm/traps.h> > #include <asm/mc146818rtc.h> > #include <asm/spinlock.h> >+#include <asm/mce.h> > #include <asm/hvm/hvm.h> > #include <asm/hvm/vpt.h> > #include <asm/hvm/support.h> >@@ -2061,7 +2062,7 @@ int hvm_msr_read_intercept(struct cpu_us > break; > > default: >- ret = mce_rdmsr(ecx, &msr_content); >+ ret = vmce_rdmsr(ecx, &msr_content); > if ( ret < 0 ) > goto gp_fault; > else if ( ret ) >@@ -2160,7 +2161,7 @@ int hvm_msr_write_intercept(struct cpu_u > break; > > default: >- ret = mce_wrmsr(ecx, msr_content); >+ ret = vmce_wrmsr(ecx, msr_content); > if ( ret < 0 ) > goto gp_fault; > else if ( ret ) >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/traps.c >--- a/xen/arch/x86/traps.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/traps.c Fri Apr 16 18:55:03 2010 +0800 >@@ -65,6 +65,7 @@ > #include <asm/traps.h> > #include <asm/hvm/vpt.h> > #include <asm/hypercall.h> >+#include <asm/mce.h> > #include <public/arch-x86/cpuid.h> > > /* >@@ -2295,7 +2296,7 @@ static int emulate_privileged_op(struct > if ( wrmsr_hypervisor_regs(regs->ecx, val) ) > break; > >- rc = mce_wrmsr(regs->ecx, val); >+ rc = vmce_wrmsr(regs->ecx, val); > if ( rc < 0 ) > goto fail; > if ( rc ) >@@ -2388,7 +2389,7 @@ static int emulate_privileged_op(struct > break; > } > >- rc = mce_rdmsr(regs->ecx, &val); >+ rc = vmce_rdmsr(regs->ecx, &val); > if ( rc < 0 ) > goto fail; > if ( rc ) >@@ -2947,19 +2948,19 @@ void async_exception_cleanup(struct vcpu > { > struct domain *d = curr->domain; > >- if ( !d->arch.vmca_msrs.nr_injection ) >+ if ( !d->arch.vmca_msrs->nr_injection ) > { > printk(XENLOG_WARNING "MCE: ret from vMCE#, " > "no injection node\n"); > goto end; > } > >- d->arch.vmca_msrs.nr_injection--; >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >+ d->arch.vmca_msrs->nr_injection--; >+ if ( !list_empty(&d->arch.vmca_msrs->impact_header) ) > { > struct bank_entry *entry; > >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >+ entry = list_entry(d->arch.vmca_msrs->impact_header.next, > struct bank_entry, list); > gdprintk(XENLOG_DEBUG, "MCE: delete last injection >node\n"); > list_del(&entry->list); >@@ -2968,7 +2969,7 @@ void async_exception_cleanup(struct vcpu > printk(XENLOG_ERR "MCE: didn''t found last injection node\n"); > > /* further injection */ >- if ( d->arch.vmca_msrs.nr_injection > 0 && >+ if ( d->arch.vmca_msrs->nr_injection > 0 && > guest_has_trap_callback(d, 0, TRAP_machine_check) && > !test_and_set_bool(curr->mce_pending) ) > { >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/common/domain.c >--- a/xen/common/domain.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/common/domain.c Fri Apr 16 18:55:03 2010 +0800 >@@ -616,6 +616,8 @@ static void complete_domain_destroy(stru > > xfree(d->pirq_mask); > xfree(d->pirq_to_evtchn); >+ xfree(dom_vmce(d)->mci_ctl); >+ xfree(dom_vmce(d)); > > xsm_free_security_domain(d); > free_domain_struct(d); >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/include/asm-x86/domain.h >--- a/xen/include/asm-x86/domain.h Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/include/asm-x86/domain.h Fri Apr 16 18:55:03 2010 +0800 >@@ -6,6 +6,7 @@ > #include <asm/hvm/vcpu.h> > #include <asm/hvm/domain.h> > #include <asm/e820.h> >+#include <asm/mce.h> > #include <public/vcpu.h> > > #define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo) >@@ -214,32 +215,6 @@ typedef xen_domctl_cpuid_t cpuid_input_t > typedef xen_domctl_cpuid_t cpuid_input_t; > > struct p2m_domain; >- >-/* Define for GUEST MCA handling */ >-#define MAX_NR_BANKS 30 >- >-/* This entry is for recording bank nodes for the impacted domain, >- * put into impact_header list. */ >-struct bank_entry { >- struct list_head list; >- uint16_t bank; >- uint64_t mci_status; >- uint64_t mci_addr; >- uint64_t mci_misc; >-}; >- >-struct domain_mca_msrs >-{ >- /* Guest should not change below values after DOM boot up */ >- uint64_t mcg_cap; >- uint64_t mcg_ctl; >- uint64_t mcg_status; >- uint64_t mci_ctl[MAX_NR_BANKS]; >- uint16_t nr_injection; >- struct list_head impact_header; >- spinlock_t lock; >-}; >- > struct time_scale { > int shift; > u32 mul_frac; >@@ -311,7 +286,7 @@ struct arch_domain > cpuid_input_t cpuids[MAX_CPUID_INPUT]; > > /* For Guest vMCA handling */ >- struct domain_mca_msrs vmca_msrs; >+ struct domain_mca_msrs *vmca_msrs; > > /* TSC management (emulation, pv, scaling, stats) */ > int tsc_mode; /* see include/asm-x86/time.h */ >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/include/asm-x86/mce.h >--- /dev/null Thu Jan 01 00:00:00 1970 +0000 >+++ b/xen/include/asm-x86/mce.h Fri Apr 16 18:55:03 2010 +0800 >@@ -0,0 +1,36 @@ >+#include <xen/types.h> >+#include <public/arch-x86/xen-mca.h> >+#ifndef _XEN_X86_MCE_H >+#define _XEN_X86_MCE_H >+/* Define for GUEST MCA handling */ >+#define MAX_NR_BANKS 30 >+ >+/* This entry is for recording bank nodes for the impacted domain, >+ * put into impact_header list. */ >+struct bank_entry { >+ struct list_head list; >+ uint16_t bank; >+ uint64_t mci_status; >+ uint64_t mci_addr; >+ uint64_t mci_misc; >+}; >+ >+struct domain_mca_msrs >+{ >+ /* Guest should not change below values after DOM boot up */ >+ uint64_t mcg_cap; >+ uint64_t mcg_ctl; >+ uint64_t mcg_status; >+ uint64_t *mci_ctl; >+ uint16_t nr_injection; >+ struct list_head impact_header; >+ spinlock_t lock; >+}; >+ >+#define dom_vmce(x) ((x)->arch.vmca_msrs) >+ >+/* Guest vMCE MSRs virtualization */ >+extern int vmce_init_msr(struct domain *d); >+extern int vmce_wrmsr(uint32_t msr, uint64_t val); >+extern int vmce_rdmsr(uint32_t msr, uint64_t *val); >+#endif >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/include/asm-x86/traps.h >--- a/xen/include/asm-x86/traps.h Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/include/asm-x86/traps.h Fri Apr 16 18:55:03 2010 +0800 >@@ -49,9 +49,4 @@ extern int send_guest_trap(struct domain > extern int send_guest_trap(struct domain *d, uint16_t vcpuid, > unsigned int trap_nr); > >-/* Guest vMCE MSRs virtualization */ >-extern void mce_init_msr(struct domain *d); >-extern int mce_wrmsr(uint32_t msr, uint64_t val); >-extern int mce_rdmsr(uint32_t msr, uint64_t *val); >- > #endif /* ASM_TRAP_H */ > > > >_______________________________________________ >Xen-devel mailing list >Xen-devel@lists.xensource.com >http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel