Hi, Jan
Thanks for the fix/clearnup!
It's fine to solve the bank hard coded problem this way.
Regards,
Criping
-----Original Message-----
From: Jan Beulich [mailto:jbeulich@novell.com]
Sent: 2009年3月28日 0:38
To: xen-devel@lists.xensource.com
Cc: Ke, Liping; Jiang, Yunhong
Subject: [PATCH] fix and clean up c/s 19423
- fix inverted return value check for intel_mce_{rd,wr}msr()
- fix broken initialization of d->arch.vmca_msrs.mci_ctl
- remove pointless (!d || is_idle_domain(d)) checks
- eliminate hard-coded limit to 9 banks
- avoid redundant gdprintk()s
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- 2009-03-27.orig/xen/arch/x86/cpu/mcheck/mce_intel.c 2009-03-27
12:06:52.000000000 +0100
+++ 2009-03-27/xen/arch/x86/cpu/mcheck/mce_intel.c 2009-03-27
12:39:44.000000000 +0100
@@ -737,7 +737,7 @@ void mce_intel_feature_init(struct cpuin
intel_init_cmci(c);
}
-uint64_t g_mcg_cap;
+static uint64_t g_mcg_cap;
static void mce_cap_init(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -749,9 +749,12 @@ static void mce_cap_init(struct cpuinfo_
if ((l & MCG_CMCI_P) && cpu_has_apic)
cmci_support = 1;
- nr_mce_banks = l & 0xff;
+ nr_mce_banks = l & MCG_CAP_COUNT;
if (nr_mce_banks > MAX_NR_BANKS)
+ {
printk(KERN_WARNING "MCE: exceed max mce banks\n");
+ g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS;
+ }
if (l & MCG_EXT_P)
{
nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff;
@@ -823,11 +826,22 @@ int intel_mcheck_init(struct cpuinfo_x86
}
/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */
-int intel_mce_wrmsr(u32 msr, u32 lo, u32 hi)
+void intel_mce_init_msr(struct domain *d)
+{
+ d->arch.vmca_msrs.mcg_status = 0x0;
+ d->arch.vmca_msrs.mcg_cap = g_mcg_cap;
+ d->arch.vmca_msrs.mcg_ctl = (uint64_t)~0x0;
+ d->arch.vmca_msrs.nr_injection = 0;
+ memset(d->arch.vmca_msrs.mci_ctl, ~0,
+ sizeof(d->arch.vmca_msrs.mci_ctl));
+ INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header);
+}
+
+int intel_mce_wrmsr(u32 msr, u64 value)
{
struct domain *d = current->domain;
struct bank_entry *entry = NULL;
- uint64_t value = (u64)hi << 32 | lo;
+ unsigned int bank;
int ret = 1;
spin_lock(&mce_locks);
@@ -835,86 +849,71 @@ int intel_mce_wrmsr(u32 msr, u32 lo, u32
{
case MSR_IA32_MCG_CTL:
if (value != (u64)~0x0 && value != 0x0) {
- gdprintk(XENLOG_WARNING, "MCE: value writen to MCG_CTL"
+ gdprintk(XENLOG_WARNING, "MCE: value written to MCG_CTL"
"should be all 0s or 1s\n");
ret = -1;
break;
}
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "MCE: wrmsr not in DOM context,
skip\n");
- break;
- }
d->arch.vmca_msrs.mcg_ctl = value;
break;
case MSR_IA32_MCG_STATUS:
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "MCE: wrmsr not in DOM context,
skip\n");
- break;
- }
d->arch.vmca_msrs.mcg_status = value;
gdprintk(XENLOG_DEBUG, "MCE: wrmsr MCG_CTL
%"PRIx64"\n", value);
break;
- case MSR_IA32_MC0_CTL2:
- case MSR_IA32_MC1_CTL2:
- case MSR_IA32_MC2_CTL2:
- case MSR_IA32_MC3_CTL2:
- case MSR_IA32_MC4_CTL2:
- case MSR_IA32_MC5_CTL2:
- case MSR_IA32_MC6_CTL2:
- case MSR_IA32_MC7_CTL2:
- case MSR_IA32_MC8_CTL2:
+ case MSR_IA32_MCG_CAP:
+ gdprintk(XENLOG_WARNING, "MCE: MCG_CAP is read-only\n");
+ ret = -1;
+ break;
+ case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, "
"Guest should not write this MSR!\n");
break;
- case MSR_IA32_MC0_CTL:
- case MSR_IA32_MC1_CTL:
- case MSR_IA32_MC2_CTL:
- case MSR_IA32_MC3_CTL:
- case MSR_IA32_MC4_CTL:
- case MSR_IA32_MC5_CTL:
- case MSR_IA32_MC6_CTL:
- case MSR_IA32_MC7_CTL:
- case MSR_IA32_MC8_CTL:
- if (value != (u64)~0x0 && value != 0x0) {
- gdprintk(XENLOG_WARNING, "MCE: value writen to MCi_CTL"
- "should be all 0s or 1s\n");
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
+ bank = (msr - MSR_IA32_MC0_CTL) / 4;
+ if (bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT)) {
+ gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n",
bank);
ret = -1;
break;
}
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "MCE: wrmsr not in DOM context,
skip\n");
+ switch (msr & (MSR_IA32_MC0_CTL | 3))
+ {
+ case MSR_IA32_MC0_CTL:
+ if (value != (u64)~0x0 && value != 0x0) {
+ gdprintk(XENLOG_WARNING, "MCE: value written to
MC%u_CTL"
+ "should be all 0s or 1s (is
%"PRIx64")\n",
+ bank, value);
+ ret = -1;
+ break;
+ }
+ d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4] = value;
break;
- }
- d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4] = value;
- break;
- case MSR_IA32_MC0_STATUS:
- case MSR_IA32_MC1_STATUS:
- case MSR_IA32_MC2_STATUS:
- case MSR_IA32_MC3_STATUS:
- case MSR_IA32_MC4_STATUS:
- case MSR_IA32_MC5_STATUS:
- case MSR_IA32_MC6_STATUS:
- case MSR_IA32_MC7_STATUS:
- case MSR_IA32_MC8_STATUS:
- if (!d || is_idle_domain(d)) {
- /* Just skip */
- gdprintk(XENLOG_WARNING, "mce wrmsr: not in domain
context!\n");
+ case MSR_IA32_MC0_STATUS:
+ /* Give the first entry of the list, it corresponds to current
+ * vMCE# injection. When vMCE# is finished processing by the
+ * the guest, this node will be deleted.
+ * Only error bank is written. Non-error banks simply return.
+ */
+ if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
+ entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == bank )
+ entry->mci_status = value;
+ gdprintk(XENLOG_DEBUG,
+ "MCE: wr MC%u_STATUS %"PRIx64" in
vMCE#\n",
+ bank, value);
+ } else
+ gdprintk(XENLOG_DEBUG,
+ "MCE: wr MC%u_STATUS %"PRIx64"\n",
bank, value);
+ break;
+ case MSR_IA32_MC0_ADDR:
+ gdprintk(XENLOG_WARNING, "MCE: MC%u_ADDR is read-only\n",
bank);
+ ret = -1;
+ break;
+ case MSR_IA32_MC0_MISC:
+ gdprintk(XENLOG_WARNING, "MCE: MC%u_MISC is read-only\n",
bank);
+ ret = -1;
break;
}
- /* Give the first entry of the list, it corresponds to current
- * vMCE# injection. When vMCE# is finished processing by the
- * the guest, this node will be deleted.
- * Only error bank is written. Non-error bank simply return.
- */
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == (msr - MSR_IA32_MC0_STATUS)/4 ) {
- entry->mci_status = value;
- }
- gdprintk(XENLOG_DEBUG, "MCE: wmrsr mci_status in vMCE#
context\n");
- }
- gdprintk(XENLOG_DEBUG, "MCE: wrmsr mci_status
val:%"PRIx64"\n", value);
break;
default:
ret = 0;
@@ -928,6 +927,7 @@ int intel_mce_rdmsr(u32 msr, u32 *lo, u3
{
struct domain *d = current->domain;
int ret = 1;
+ unsigned int bank;
struct bank_entry *entry = NULL;
*lo = *hi = 0x0;
@@ -935,142 +935,82 @@ int intel_mce_rdmsr(u32 msr, u32 *lo, u3
switch(msr)
{
case MSR_IA32_MCG_STATUS:
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain
context!\n");
- *lo = *hi = 0x0;
- break;
- }
*lo = (u32)d->arch.vmca_msrs.mcg_status;
*hi = (u32)(d->arch.vmca_msrs.mcg_status >> 32);
gdprintk(XENLOG_DEBUG, "MCE: rd MCG_STATUS lo %x hi %x\n",
*lo, *hi);
break;
case MSR_IA32_MCG_CAP:
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain
context!\n");
- *lo = *hi = 0x0;
- break;
- }
*lo = (u32)d->arch.vmca_msrs.mcg_cap;
*hi = (u32)(d->arch.vmca_msrs.mcg_cap >> 32);
gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CAP lo %x hi %x\n",
*lo, *hi);
break;
case MSR_IA32_MCG_CTL:
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain
context!\n");
- *lo = *hi = 0x0;
- break;
- }
*lo = (u32)d->arch.vmca_msrs.mcg_ctl;
*hi = (u32)(d->arch.vmca_msrs.mcg_ctl >> 32);
gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CTL lo %x hi %x\n",
*lo, *hi);
break;
- case MSR_IA32_MC0_CTL2:
- case MSR_IA32_MC1_CTL2:
- case MSR_IA32_MC2_CTL2:
- case MSR_IA32_MC3_CTL2:
- case MSR_IA32_MC4_CTL2:
- case MSR_IA32_MC5_CTL2:
- case MSR_IA32_MC6_CTL2:
- case MSR_IA32_MC7_CTL2:
- case MSR_IA32_MC8_CTL2:
+ case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, "
"Guest should not read this MSR!\n");
break;
- case MSR_IA32_MC0_CTL:
- case MSR_IA32_MC1_CTL:
- case MSR_IA32_MC2_CTL:
- case MSR_IA32_MC3_CTL:
- case MSR_IA32_MC4_CTL:
- case MSR_IA32_MC5_CTL:
- case MSR_IA32_MC6_CTL:
- case MSR_IA32_MC7_CTL:
- case MSR_IA32_MC8_CTL:
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain
context!\n");
- *lo = *hi = 0x0;
- break;
- }
- *lo = (u32)d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4];
- *hi - (u32)(d->arch.vmca_msrs.mci_ctl[(msr -
MSR_IA32_MC0_CTL)/4]
- >> 32);
- gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_CTL lo %x hi %x\n",
*lo, *hi);
- break;
- case MSR_IA32_MC0_STATUS:
- case MSR_IA32_MC1_STATUS:
- case MSR_IA32_MC2_STATUS:
- case MSR_IA32_MC3_STATUS:
- case MSR_IA32_MC4_STATUS:
- case MSR_IA32_MC5_STATUS:
- case MSR_IA32_MC6_STATUS:
- case MSR_IA32_MC7_STATUS:
- case MSR_IA32_MC8_STATUS:
- /* Only error bank is read. Non-error bank simply return */
- *lo = *hi = 0x0;
- gdprintk(XENLOG_DEBUG, "MCE: rdmsr mci_status\n");
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "mce_rdmsr: not in domain
context!\n");
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
+ bank = (msr - MSR_IA32_MC0_CTL) / 4;
+ if (bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT)) {
+ gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n",
bank);
+ ret = -1;
break;
}
- if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == (msr - MSR_IA32_MC0_STATUS)/4 ) {
- *lo = entry->mci_status;
- *hi = entry->mci_status >> 32;
- gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_STATUS in vmCE#
context "
- "lo %x hi %x\n", *lo, *hi);
+ switch (msr & (MSR_IA32_MC0_CTL | 3))
+ {
+ case MSR_IA32_MC0_CTL:
+ *lo = (u32)d->arch.vmca_msrs.mci_ctl[bank];
+ *hi = (u32)(d->arch.vmca_msrs.mci_ctl[bank] >> 32);
+ gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_CTL lo %x hi %x\n",
+ bank, *lo, *hi);
+ break;
+ case MSR_IA32_MC0_STATUS:
+ /* Only error bank is read. Non-error banks simply return. */
+ if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
+ entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ struct bank_entry, list);
+ if (entry->bank == bank) {
+ *lo = entry->mci_status;
+ *hi = entry->mci_status >> 32;
+ gdprintk(XENLOG_DEBUG,
+ "MCE: rd MC%u_STATUS in vmCE# context "
+ "lo %x hi %x\n", bank, *lo, *hi);
+ } else
+ entry = NULL;
}
- }
- break;
- case MSR_IA32_MC0_ADDR:
- case MSR_IA32_MC1_ADDR:
- case MSR_IA32_MC2_ADDR:
- case MSR_IA32_MC3_ADDR:
- case MSR_IA32_MC4_ADDR:
- case MSR_IA32_MC5_ADDR:
- case MSR_IA32_MC6_ADDR:
- case MSR_IA32_MC7_ADDR:
- case MSR_IA32_MC8_ADDR:
- *lo = *hi = 0x0;
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "mce_rdmsr: not in domain
context!\n");
+ if (!entry)
+ gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_STATUS\n",
bank);
break;
- }
- if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == (msr - MSR_IA32_MC0_ADDR)/4 ) {
- *lo = entry->mci_addr;
- *hi = entry->mci_addr >> 32;
- gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_ADDR in vMCE#
context "
- "lo %x hi %x\n", *lo, *hi);
+ case MSR_IA32_MC0_ADDR:
+ if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
+ entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ struct bank_entry, list);
+ if (entry->bank == bank) {
+ *lo = entry->mci_addr;
+ *hi = entry->mci_addr >> 32;
+ gdprintk(XENLOG_DEBUG,
+ "MCE: rd MC%u_ADDR in vMCE# context lo %x hi
%x\n",
+ bank, *lo, *hi);
+ }
}
- }
- break;
- case MSR_IA32_MC0_MISC:
- case MSR_IA32_MC1_MISC:
- case MSR_IA32_MC2_MISC:
- case MSR_IA32_MC3_MISC:
- case MSR_IA32_MC4_MISC:
- case MSR_IA32_MC5_MISC:
- case MSR_IA32_MC6_MISC:
- case MSR_IA32_MC7_MISC:
- case MSR_IA32_MC8_MISC:
- *lo = *hi = 0x0;
- if (!d || is_idle_domain(d)) {
- gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain
context!\n");
break;
- }
- if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == (msr - MSR_IA32_MC0_MISC)/4 ) {
- *lo = entry->mci_misc;
- *hi = entry->mci_misc >> 32;
- gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_MISC in vMCE#
context "
- " lo %x hi %x\n", *lo, *hi);
+ case MSR_IA32_MC0_MISC:
+ if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
+ entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ struct bank_entry, list);
+ if (entry->bank == bank) {
+ *lo = entry->mci_misc;
+ *hi = entry->mci_misc >> 32;
+ gdprintk(XENLOG_DEBUG,
+ "MCE: rd MC%u_MISC in vMCE# context lo %x hi
%x\n",
+ bank, *lo, *hi);
+ }
}
+ break;
}
break;
default:
--- 2009-03-27.orig/xen/arch/x86/domain.c 2009-03-27 13:20:57.000000000
+0100
+++ 2009-03-27/xen/arch/x86/domain.c 2009-03-27 13:21:43.000000000 +0100
@@ -46,6 +46,7 @@
#include <asm/hvm/support.h>
#include <asm/debugreg.h>
#include <asm/msr.h>
+#include <asm/traps.h>
#include <asm/nmi.h>
#include <xen/numa.h>
#include <xen/iommu.h>
@@ -373,7 +374,6 @@ void vcpu_destroy(struct vcpu *v)
hvm_vcpu_destroy(v);
}
-extern uint64_t g_mcg_cap;
int arch_domain_create(struct domain *d, unsigned int domcr_flags)
{
#ifdef __x86_64__
@@ -458,14 +458,8 @@ int arch_domain_create(struct domain *d,
goto fail;
/* For Guest vMCE MSRs virtualization */
- d->arch.vmca_msrs.mcg_status = 0x0;
- d->arch.vmca_msrs.mcg_cap = g_mcg_cap;
- d->arch.vmca_msrs.mcg_ctl = (uint64_t)~0x0;
- d->arch.vmca_msrs.nr_injection = 0;
- memset(d->arch.vmca_msrs.mci_ctl, 0x1,
- sizeof(d->arch.vmca_msrs.mci_ctl));
- INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header);
-
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+ intel_mce_init_msr(d);
}
if ( is_hvm_domain(d) )
--- 2009-03-27.orig/xen/arch/x86/traps.c 2009-03-27 10:10:57.000000000
+0100
+++ 2009-03-27/xen/arch/x86/traps.c 2009-03-27 10:31:05.000000000 +0100
@@ -1637,10 +1637,6 @@ static int is_cpufreq_controller(struct
(d->domain_id == 0));
}
-/*Intel vMCE MSRs virtualization*/
-extern int intel_mce_wrmsr(u32 msr, u32 lo, u32 hi);
-extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi);
-
static int emulate_privileged_op(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
@@ -2210,10 +2206,10 @@ static int emulate_privileged_op(struct
break;
if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
{
- int rc = intel_mce_wrmsr(regs->ecx, eax, edx);
- if ( rc == -1 )
+ int rc = intel_mce_wrmsr(regs->ecx, res);
+ if ( rc < 0 )
goto fail;
- if ( rc == 0 )
+ if ( rc )
break;
}
@@ -2291,25 +2287,27 @@ static int emulate_privileged_op(struct
default:
if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
{
+ rdmsr_writeback:
regs->eax = l;
regs->edx = h;
break;
}
- /* Everyone can read the MSR space. */
- /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR
%p.\n",
- _p(regs->ecx));*/
- if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
- goto fail;
if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
{
- int rc = intel_mce_rdmsr(regs->ecx, &eax, &edx);
- if ( rc == -1 )
+ int rc = intel_mce_rdmsr(regs->ecx, &l, &h);
+
+ if ( rc < 0 )
goto fail;
- if ( rc == 0 )
- break;
+ if ( rc )
+ goto rdmsr_writeback;
}
+ /* Everyone can read the MSR space. */
+ /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR
%p.\n",
+ _p(regs->ecx));*/
+ if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
+ goto fail;
break;
}
break;
--- 2009-03-27.orig/xen/include/asm-x86/traps.h 2009-03-27 13:20:57.000000000
+0100
+++ 2009-03-27/xen/include/asm-x86/traps.h 2009-03-27 10:48:02.000000000
+0100
@@ -47,4 +47,9 @@ extern int guest_has_trap_callback(struc
extern int send_guest_trap(struct domain *d, uint16_t vcpuid,
unsigned int trap_nr);
+/* Intel vMCE MSRs virtualization */
+extern void intel_mce_init_msr(struct domain *d);
+extern int intel_mce_wrmsr(u32 msr, u64 value);
+extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi);
+
#endif /* ASM_TRAP_H */
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel