thr3ads.net - Xen devel - [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests [Jan 2012]

If this information is useful, please help other people find it:
Share via:

Boris Ostrovsky

2012-Jan-16 21:11 UTC

[PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@amd.com>
# Date 1326748089 -3600
# Node ID f157d40df95aa3b8becb970968d33f4eca6c7e75
# Parent  5b2676ac13218951698c49fa0350f2ac48220f3d
x86/AMD: Add support for AMD''s OSVW feature in guests.

In some cases guests should not provide workarounds for errata even when the
physical processor is affected. For example, because of erratum 400 on family
10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
going to idle in order to avoid getting stuck in a non-C0 state. This is not
necessary: HLT and IO instructions are intercepted and therefore there is no
reason for erratum 400 workaround in the guest.

This patch allows us to present a guest with certain errata as fixed,
regardless of the state of actual hardware.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>

diff -r 5b2676ac1321 -r f157d40df95a tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c	Mon Jan 09 16:01:44 2012 +0100
+++ b/tools/libxc/xc_cpuid_x86.c	Mon Jan 16 22:08:09 2012 +0100
@@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
                     bitmaskof(X86_FEATURE_SSE4A) |
                     bitmaskof(X86_FEATURE_MISALIGNSSE) |
                     bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
+		    bitmaskof(X86_FEATURE_OSVW) |
                     bitmaskof(X86_FEATURE_XOP) |
                     bitmaskof(X86_FEATURE_FMA4) |
                     bitmaskof(X86_FEATURE_TBM) |
@@ -524,7 +525,6 @@ static void xc_cpuid_pv_policy(
         clear_bit(X86_FEATURE_RDTSCP, regs[3]);
 
         clear_bit(X86_FEATURE_SVM, regs[2]);
-        clear_bit(X86_FEATURE_OSVW, regs[2]);
         clear_bit(X86_FEATURE_IBS, regs[2]);
         clear_bit(X86_FEATURE_SKINIT, regs[2]);
         clear_bit(X86_FEATURE_WDT, regs[2]);
diff -r 5b2676ac1321 -r f157d40df95a xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c	Mon Jan 09 16:01:44 2012 +0100
+++ b/xen/arch/x86/cpu/amd.c	Mon Jan 16 22:08:09 2012 +0100
@@ -32,6 +32,13 @@
 static char opt_famrev[14];
 string_param("cpuid_mask_cpu", opt_famrev);
 
+/*
+ * Set osvw_len to higher number when updated Revision Guides
+ * are published and we know what the new status bits are
+ */
+static uint64_t osvw_length = 4, osvw_status;
+static DEFINE_SPINLOCK(amd_lock);
+
 static inline void wrmsr_amd(unsigned int index, unsigned int lo, 
 		unsigned int hi)
 {
@@ -182,6 +189,35 @@ static void __devinit set_cpuidmask(cons
 	}
 }
 
+static void amd_guest_osvw_init(struct vcpu *vcpu)
+{
+    if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+	return;
+
+    /*
+     * Guests should see errata 400 and 415 as fixed (assuming that
+     * HLT and IO instructions are intercepted).
+     */
+    vcpu->arch.amd.osvw.length = (osvw_length >= 3) ? (osvw_length) : 3;
+    vcpu->arch.amd.osvw.status = osvw_status & ~(6ULL);
+
+    /*
+     * By increasing VCPU''s osvw.length to 3 we are telling the guest
that
+     * all osvw.status bits inside that length, including bit 0 (which is
+     * reserved for erratum 298), are valid. However, if host
processor''s
+     * osvw_len is 0 then osvw_status[0] carries no information. We need to
+     * be conservative here and therefore we tell the guest that erratum 298
+     * is present (because we really don''t know).
+     */
+    if (osvw_length == 0 && boot_cpu_data.x86 == 0x10)
+	vcpu->arch.amd.osvw.status |= 1;
+}
+
+void amd_vcpu_initialise(struct vcpu *vcpu)
+{
+    amd_guest_osvw_init(vcpu);
+}
+
 /*
  * Check for the presence of an AMD erratum. Arguments are defined in amd.h 
  * for each known erratum. Return 1 if erratum is found.
@@ -512,6 +548,30 @@ static void __devinit init_amd(struct cp
 	set_cpuidmask(c);
 
 	check_syscfg_dram_mod_en();
+
+    /* 
+     * Get OSVW bits. If bits are not the same on different processors then
+     * choose the worst case (i.e. if erratum is present on one processor and
+     * not on another assume that the erratum is present everywhere).
+     */
+     if (test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability)) {
+         uint64_t len, status;
+
+        if (rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
+            rdmsr_safe(MSR_AMD_OSVW_STATUS, status))
+            len = status = 0;
+
+        spin_lock(&amd_lock);
+        
+        if (len < osvw_length)
+            osvw_length = len;
+
+        osvw_status |= status;
+        osvw_status &= (1ULL << osvw_length) - 1;
+
+        spin_unlock(&amd_lock);
+    } else
+	 osvw_length = osvw_status = 0;
 }
 
 static struct cpu_dev amd_cpu_dev __cpuinitdata = {
diff -r 5b2676ac1321 -r f157d40df95a xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c	Mon Jan 09 16:01:44 2012 +0100
+++ b/xen/arch/x86/domain.c	Mon Jan 16 22:08:09 2012 +0100
@@ -422,6 +422,10 @@ int vcpu_initialise(struct vcpu *v)
     if ( (rc = vcpu_init_fpu(v)) != 0 )
         return rc;
 
+    /* Vendor-specific initialization */
+    if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+        amd_vcpu_initialise(v);
+
     if ( is_hvm_domain(d) )
     {
         rc = hvm_vcpu_initialise(v);
diff -r 5b2676ac1321 -r f157d40df95a xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Mon Jan 09 16:01:44 2012 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c	Mon Jan 16 22:08:09 2012 +0100
@@ -1044,6 +1044,27 @@ static void svm_init_erratum_383(struct 
     }
 }
 
+static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, uint
read)
+{
+    uint eax, ebx, ecx, edx;
+     
+    /* Guest OSVW support */
+    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+    if (!test_bit((X86_FEATURE_OSVW & 31), &ecx))
+        return -1;
+
+    if (read) {
+        if (msr == MSR_AMD_OSVW_ID_LENGTH)
+            *val = v->arch.amd.osvw.length;
+        else
+            *val = v->arch.amd.osvw.status;
+    } 
+    /* Writes are ignored */
+
+    return 0;
+}
+
+
 static int svm_cpu_up(void)
 {
     uint64_t msr_content;
@@ -1385,6 +1406,13 @@ static int svm_msr_read_intercept(unsign
         vpmu_do_rdmsr(msr, msr_content);
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, msr_content, 1);
+        if (ret < 0)
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_rdmsr(v, msr, msr_content);
         if ( ret < 0 )
@@ -1509,6 +1537,13 @@ static int svm_msr_write_intercept(unsig
          */
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, &msr_content, 0);
+        if (ret < 0)
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_wrmsr(v, msr, msr_content);
         if ( ret < 0 )
diff -r 5b2676ac1321 -r f157d40df95a xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c	Mon Jan 09 16:01:44 2012 +0100
+++ b/xen/arch/x86/traps.c	Mon Jan 16 22:08:09 2012 +0100
@@ -71,6 +71,7 @@
 #include <asm/apic.h>
 #include <asm/mc146818rtc.h>
 #include <asm/hpet.h>
+#include <asm/amd.h>
 #include <public/arch-x86/cpuid.h>
 #include <xsm/xsm.h>
 
@@ -889,7 +890,6 @@ static void pv_cpuid(struct cpu_user_reg
         __clear_bit(X86_FEATURE_SVM % 32, &c);
         if ( !cpu_has_apic )
            __clear_bit(X86_FEATURE_EXTAPIC % 32, &c);
-        __clear_bit(X86_FEATURE_OSVW % 32, &c);
         __clear_bit(X86_FEATURE_IBS % 32, &c);
         __clear_bit(X86_FEATURE_SKINIT % 32, &c);
         __clear_bit(X86_FEATURE_WDT % 32, &c);
@@ -2542,6 +2542,15 @@ static int emulate_privileged_op(struct 
             if ( wrmsr_safe(regs->ecx, msr_content) != 0 )
                 goto fail;
             break;
+        case MSR_AMD_OSVW_ID_LENGTH:
+        case MSR_AMD_OSVW_STATUS:
+            if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) {
+                if (!boot_cpu_has(X86_FEATURE_OSVW))
+                    goto fail;
+                else
+                    break; /* Writes are ignored */
+            }
+            /* Fall through to default case */
         default:
             if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) )
                 break;
@@ -2573,6 +2582,7 @@ static int emulate_privileged_op(struct 
         break;
 
     case 0x32: /* RDMSR */
+
         switch ( (u32)regs->ecx )
         {
 #ifdef CONFIG_X86_64
@@ -2632,6 +2642,23 @@ static int emulate_privileged_op(struct 
             regs->eax = (uint32_t)msr_content;
             regs->edx = (uint32_t)(msr_content >> 32);
             break;
+        case MSR_AMD_OSVW_ID_LENGTH:
+        case MSR_AMD_OSVW_STATUS:
+            if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+                if (!boot_cpu_has(X86_FEATURE_OSVW))
+                    goto fail;
+                else {
+                    if ((u32)regs->ecx == MSR_AMD_OSVW_ID_LENGTH)
+                        msr_content = v->arch.amd.osvw.length;
+                    else
+                        msr_content = v->arch.amd.osvw.status;
+
+                    regs->eax = (uint32_t)msr_content;
+                    regs->edx = (uint32_t)(msr_content >> 32);
+                }
+            } else
+                goto rdmsr_normal;
+            break;
         default:
             if ( rdmsr_hypervisor_regs(regs->ecx, &val) )
             {
diff -r 5b2676ac1321 -r f157d40df95a xen/include/asm-x86/amd.h
--- a/xen/include/asm-x86/amd.h	Mon Jan 09 16:01:44 2012 +0100
+++ b/xen/include/asm-x86/amd.h	Mon Jan 16 22:08:09 2012 +0100
@@ -140,7 +140,17 @@
                        AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf),	\
                        AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf))
 
+struct vcpu_amd {
+
+    /* OSVW MSRs */
+    struct {
+	u64 length;
+	u64 status;
+    } osvw;
+};
+
 struct cpuinfo_x86;
+void amd_vcpu_initialise(struct vcpu *);
 int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
 
 #ifdef __x86_64__
diff -r 5b2676ac1321 -r f157d40df95a xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h	Mon Jan 09 16:01:44 2012 +0100
+++ b/xen/include/asm-x86/domain.h	Mon Jan 16 22:08:09 2012 +0100
@@ -8,6 +8,7 @@
 #include <asm/hvm/domain.h>
 #include <asm/e820.h>
 #include <asm/mce.h>
+#include <asm/amd.h>
 #include <public/vcpu.h>
 
 #define has_32bit_shinfo(d)    ((d)->arch.has_32bit_shinfo)
@@ -495,6 +496,11 @@ struct arch_vcpu
 
     uint32_t gdbsx_vcpu_event;
 
+    /* Vendor-specific data */
+    union {
+        struct vcpu_amd amd;
+    };
+
     /* A secondary copy of the vcpu time info. */
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;

Jan Beulich

2012-Jan-17 08:26 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 16.01.12 at 22:11, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> --- a/tools/libxc/xc_cpuid_x86.c	Mon Jan 09 16:01:44 2012 +0100
> +++ b/tools/libxc/xc_cpuid_x86.c	Mon Jan 16 22:08:09 2012 +0100
> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
>                      bitmaskof(X86_FEATURE_SSE4A) |
>                      bitmaskof(X86_FEATURE_MISALIGNSSE) |
>                      bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
> +		    bitmaskof(X86_FEATURE_OSVW) |
Indentation.

Also, is this really meaningful to PV guests? And valid for pre-Fam10?
>                      bitmaskof(X86_FEATURE_XOP) |
>                      bitmaskof(X86_FEATURE_FMA4) |
>                      bitmaskof(X86_FEATURE_TBM) |
> --- a/xen/arch/x86/cpu/amd.c	Mon Jan 09 16:01:44 2012 +0100
> +++ b/xen/arch/x86/cpu/amd.c	Mon Jan 16 22:08:09 2012 +0100
> @@ -32,6 +32,13 @@
>  static char opt_famrev[14];
>  string_param("cpuid_mask_cpu", opt_famrev);
>  
> +/*
> + * Set osvw_len to higher number when updated Revision Guides
> + * are published and we know what the new status bits are
> + */
This is ugly, as it requires permanent attention. The value to start
with should really be 64 (beyond which other code adjustments are
going to be necessary anyway).
> +static uint64_t osvw_length = 4, osvw_status;
> +static DEFINE_SPINLOCK(amd_lock);
> +
>  static inline void wrmsr_amd(unsigned int index, unsigned int lo, 
>  		unsigned int hi)
>  {
> @@ -182,6 +189,35 @@ static void __devinit set_cpuidmask(cons
>  	}
>  }
>  
> +static void amd_guest_osvw_init(struct vcpu *vcpu)
> +{
> +    if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
> +	return;
Shouldn''t we bail here for pre-Fam10 CPUs too?

Further, as asked above already - is all this really meaningful to PV
guests? Otherwise the code would better go somewhere under
xen/arch/x86/hvm/svm/ ...

Also, throughout this file indentation needs to be changed to Linux
style (tabs instead of spaces), unless you want to contribute a patch
to convert the whole file to Xen style (in which case you''d still need
to make adjustments here).
> +
> +    /*
> +     * Guests should see errata 400 and 415 as fixed (assuming that
> +     * HLT and IO instructions are intercepted).
> +     */
> +    vcpu->arch.amd.osvw.length = (osvw_length >= 3) ? (osvw_length)
: 3;
An expression consisting of just an identifier for sure doesn''t need
parentheses.
> +    vcpu->arch.amd.osvw.status = osvw_status & ~(6ULL);
> +
> +    /*
> +     * By increasing VCPU''s osvw.length to 3 we are telling the
guest that
> +     * all osvw.status bits inside that length, including bit 0 (which is
> +     * reserved for erratum 298), are valid. However, if host
processor''s
> +     * osvw_len is 0 then osvw_status[0] carries no information. We need
to
> +     * be conservative here and therefore we tell the guest that erratum
298
> +     * is present (because we really don''t know).
> +     */
> +    if (osvw_length == 0 && boot_cpu_data.x86 == 0x10)
Why do you check the family here? If osvw_length can only ever be
zero on Fam10, then the first check is sufficient. If osvw_length can
be zero on other than Fam10 (no matter whether we bail early older
CPUs), then the check is actually wrong.
> +	vcpu->arch.amd.osvw.status |= 1;
> +}
> +
> +void amd_vcpu_initialise(struct vcpu *vcpu)
> +{
> +    amd_guest_osvw_init(vcpu);
> +}
> +
>  /*
>   * Check for the presence of an AMD erratum. Arguments are defined in
amd.h
> 
>   * for each known erratum. Return 1 if erratum is found.
> @@ -512,6 +548,30 @@ static void __devinit init_amd(struct cp
>  	set_cpuidmask(c);
>  
>  	check_syscfg_dram_mod_en();
> +
> +    /* 
> +     * Get OSVW bits. If bits are not the same on different processors
then
> +     * choose the worst case (i.e. if erratum is present on one processor
and
> +     * not on another assume that the erratum is present everywhere).
> +     */
> +     if (test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability)) {
> +         uint64_t len, status;
> +
> +        if (rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
> +            rdmsr_safe(MSR_AMD_OSVW_STATUS, status))
> +            len = status = 0;
> +
> +        spin_lock(&amd_lock);
What is the locking here supposed to protect against? The AP call tree
is smp_callin() -> identify_cpu() -> init_amd(), which cannot race with
anything (as the initiating processor is waiting for cpu_state to change
to CPU_STATE_CALLIN).
> +        
> +        if (len < osvw_length)
> +            osvw_length = len;
> +
> +        osvw_status |= status;
> +        osvw_status &= (1ULL << osvw_length) - 1;
> +
> +        spin_unlock(&amd_lock);
> +    } else
> +	 osvw_length = osvw_status = 0;
>  }
>  
>  static struct cpu_dev amd_cpu_dev __cpuinitdata = {
> --- a/xen/arch/x86/domain.c	Mon Jan 09 16:01:44 2012 +0100
> +++ b/xen/arch/x86/domain.c	Mon Jan 16 22:08:09 2012 +0100
> @@ -422,6 +422,10 @@ int vcpu_initialise(struct vcpu *v)
>      if ( (rc = vcpu_init_fpu(v)) != 0 )
>          return rc;
>  
> +    /* Vendor-specific initialization */
> +    if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
If you check the vendor here, you don''t need to anywhere in the
descendant functions.
> +        amd_vcpu_initialise(v);
> +
>      if ( is_hvm_domain(d) )
>      {
>          rc = hvm_vcpu_initialise(v);
> --- a/xen/arch/x86/hvm/svm/svm.c	Mon Jan 09 16:01:44 2012 +0100
> +++ b/xen/arch/x86/hvm/svm/svm.c	Mon Jan 16 22:08:09 2012 +0100
> @@ -1044,6 +1044,27 @@ static void svm_init_erratum_383(struct 
>      }
>  }
>  
> +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val,
uint read)
> +{
> +    uint eax, ebx, ecx, edx;
> +     
> +    /* Guest OSVW support */
> +    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
> +    if (!test_bit((X86_FEATURE_OSVW & 31), &ecx))
Formatting of changes to this file should be changed to Xen style.
> +        return -1;
> +
> +    if (read) {
> +        if (msr == MSR_AMD_OSVW_ID_LENGTH)
> +            *val = v->arch.amd.osvw.length;
> +        else
> +            *val = v->arch.amd.osvw.status;
> +    } 
> +    /* Writes are ignored */
> +
> +    return 0;
> +}
> +
> +
>  static int svm_cpu_up(void)
>  {
>      uint64_t msr_content;
> --- a/xen/arch/x86/traps.c	Mon Jan 09 16:01:44 2012 +0100
> +++ b/xen/arch/x86/traps.c	Mon Jan 16 22:08:09 2012 +0100
> @@ -2542,6 +2542,15 @@ static int emulate_privileged_op(struct 
>              if ( wrmsr_safe(regs->ecx, msr_content) != 0 )
>                  goto fail;
>              break;
> +        case MSR_AMD_OSVW_ID_LENGTH:
> +        case MSR_AMD_OSVW_STATUS:
> +            if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) {
> +                if (!boot_cpu_has(X86_FEATURE_OSVW))
> +                    goto fail;
> +                else
Bogus "else" after a "goto". And I wonder, provided there is
some
point in doing all this for PV guests in the first place, whether this
shouldn''t be kept getting handled by the default case.
> +                    break; /* Writes are ignored */
> +            }
> +            /* Fall through to default case */
>          default:
>              if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) )
>                  break;
> @@ -2573,6 +2582,7 @@ static int emulate_privileged_op(struct 
>          break;
>  
>      case 0x32: /* RDMSR */
> +
Stray addition of a newline.
>          switch ( (u32)regs->ecx )
>          {
>  #ifdef CONFIG_X86_64
> @@ -2632,6 +2642,23 @@ static int emulate_privileged_op(struct 
>              regs->eax = (uint32_t)msr_content;
>              regs->edx = (uint32_t)(msr_content >> 32);
>              break;
> +        case MSR_AMD_OSVW_ID_LENGTH:
> +        case MSR_AMD_OSVW_STATUS:
> +            if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
> +                if (!boot_cpu_has(X86_FEATURE_OSVW))
> +                    goto fail;
> +                else {
Bogus "else" after a "goto".

Jan
> +                    if ((u32)regs->ecx == MSR_AMD_OSVW_ID_LENGTH)
> +                        msr_content = v->arch.amd.osvw.length;
> +                    else
> +                        msr_content = v->arch.amd.osvw.status;
> +
> +                    regs->eax = (uint32_t)msr_content;
> +                    regs->edx = (uint32_t)(msr_content >> 32);
> +                }
> +            } else
> +                goto rdmsr_normal;
> +            break;
>          default:
>              if ( rdmsr_hypervisor_regs(regs->ecx, &val) )
>              {

Boris Ostrovsky

2012-Jan-17 17:54 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Responses inline.

On 01/17/12 03:26, Jan Beulich wrote:>>>> On 16.01.12 at 22:11, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>> --- a/tools/libxc/xc_cpuid_x86.c	Mon Jan 09 16:01:44 2012 +0100
>> +++ b/tools/libxc/xc_cpuid_x86.c	Mon Jan 16 22:08:09 2012 +0100
>> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
>>                       bitmaskof(X86_FEATURE_SSE4A) |
>>                       bitmaskof(X86_FEATURE_MISALIGNSSE) |
>>                       bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
>> +		    bitmaskof(X86_FEATURE_OSVW) |
>
> Indentation.
>
> Also, is this really meaningful to PV guests?
Does amd_xc_cpuid_policy() get called for PV guests? I thought this is 
HVM path only.

xc_cpuid_pv_policy() is where clear_bit(X86_FEATURE_OSVW, regs[2]) was 
removed to handle PV.

I believe this (i.e. OSVW changes) is meaningful for PV. Taking erratum 
400 as an example -- we don''t need a Linux PV guest reading an MSR 
before going to idle (in amd_e400_idle()).
> And valid for pre-Fam10?
No, it indeed shouldn''t be set in those cases.

I could set the bit conditionally, based on host family but the trouble 
is I don''t necessarily know what family the guest will be. Or is this 
known at this point?
>
>>                       bitmaskof(X86_FEATURE_XOP) |
>>                       bitmaskof(X86_FEATURE_FMA4) |
>>                       bitmaskof(X86_FEATURE_TBM) |
>> --- a/xen/arch/x86/cpu/amd.c	Mon Jan 09 16:01:44 2012 +0100
>> +++ b/xen/arch/x86/cpu/amd.c	Mon Jan 16 22:08:09 2012 +0100
>> @@ -32,6 +32,13 @@
>>   static char opt_famrev[14];
>>   string_param("cpuid_mask_cpu", opt_famrev);
>>
>> +/*
>> + * Set osvw_len to higher number when updated Revision Guides
>> + * are published and we know what the new status bits are
>> + */
>
> This is ugly, as it requires permanent attention. The value to start
> with should really be 64 (beyond which other code adjustments are
> going to be necessary anyway).
I went back and forth on this. The reason I settled on 4 is because we 
obviously don''t know what errata the higher bits will cover and because
it is (at least theoretically) possible that a guest shouldn''t see the 
same status bits as the host.

But maybe code maintenance is more burden than it''s worth?
>
>> +static uint64_t osvw_length = 4, osvw_status;
>> +static DEFINE_SPINLOCK(amd_lock);
>> +
>>   static inline void wrmsr_amd(unsigned int index, unsigned int lo,
>>   		unsigned int hi)
>>   {
>> @@ -182,6 +189,35 @@ static void __devinit set_cpuidmask(cons
>>   	}
>>   }
>>
>> +static void amd_guest_osvw_init(struct vcpu *vcpu)
>> +{
>> +    if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
>> +	return;
>
> Shouldn''t we bail here for pre-Fam10 CPUs too?
As you noted below the vendor check should be removed anyway since this 
is being called from under such a check already.

As for family --- I think I need to solve how X86_FEATURE_OSVW is set 
and that will take care of whether anything that gets initialized in 
this routine is ever used.
>
> Further, as asked above already - is all this really meaningful to PV
> guests? Otherwise the code would better go somewhere under
> xen/arch/x86/hvm/svm/ ...
>
> Also, throughout this file indentation needs to be changed to Linux
> style (tabs instead of spaces), unless you want to contribute a patch
> to convert the whole file to Xen style (in which case you''d still
need
> to make adjustments here).
>
>> +
>> +    /*
>> +     * Guests should see errata 400 and 415 as fixed (assuming that
>> +     * HLT and IO instructions are intercepted).
>> +     */
>> +    vcpu->arch.amd.osvw.length = (osvw_length>= 3) ?
(osvw_length) : 3;
>
> An expression consisting of just an identifier for sure doesn''t
need
> parentheses.
>
>> +    vcpu->arch.amd.osvw.status = osvw_status&  ~(6ULL);
>> +
>> +    /*
>> +     * By increasing VCPU''s osvw.length to 3 we are telling
the guest that
>> +     * all osvw.status bits inside that length, including bit 0 (which
is
>> +     * reserved for erratum 298), are valid. However, if host
processor''s
>> +     * osvw_len is 0 then osvw_status[0] carries no information. We
need to
>> +     * be conservative here and therefore we tell the guest that
erratum 298
>> +     * is present (because we really don''t know).
>> +     */
>> +    if (osvw_length == 0&&  boot_cpu_data.x86 == 0x10)
>
> Why do you check the family here? If osvw_length can only ever be
> zero on Fam10, then the first check is sufficient. If osvw_length can
> be zero on other than Fam10 (no matter whether we bail early older
> CPUs), then the check is actually wrong.
10h is the only family affected by this erratum.
>
>> +	vcpu->arch.amd.osvw.status |= 1;
>> +}
>> +
>> +void amd_vcpu_initialise(struct vcpu *vcpu)
>> +{
>> +    amd_guest_osvw_init(vcpu);
>> +}
>> +
>>   /*
>>    * Check for the presence of an AMD erratum. Arguments are defined in
amd.h
>>
>>    * for each known erratum. Return 1 if erratum is found.
>> @@ -512,6 +548,30 @@ static void __devinit init_amd(struct cp
>>   	set_cpuidmask(c);
>>
>>   	check_syscfg_dram_mod_en();
>> +
>> +    /*
>> +     * Get OSVW bits. If bits are not the same on different processors
then
>> +     * choose the worst case (i.e. if erratum is present on one
processor and
>> +     * not on another assume that the erratum is present everywhere).
>> +     */
>> +     if (test_bit(X86_FEATURE_OSVW,&boot_cpu_data.x86_capability))
{
>> +         uint64_t len, status;
>> +
>> +        if (rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
>> +            rdmsr_safe(MSR_AMD_OSVW_STATUS, status))
>> +            len = status = 0;
>> +
>> +        spin_lock(&amd_lock);
>
> What is the locking here supposed to protect against? The AP call tree
> is smp_callin() ->  identify_cpu() ->  init_amd(), which cannot race
with
> anything (as the initiating processor is waiting for cpu_state to change
> to CPU_STATE_CALLIN).
>
>> +
>> +        if (len<  osvw_length)
>> +            osvw_length = len;
>> +
>> +        osvw_status |= status;
>> +        osvw_status&= (1ULL<<  osvw_length) - 1;
>> +
>> +        spin_unlock(&amd_lock);
>> +    } else
>> +	 osvw_length = osvw_status = 0;
>>   }
>>
>>   static struct cpu_dev amd_cpu_dev __cpuinitdata = {
>> --- a/xen/arch/x86/domain.c	Mon Jan 09 16:01:44 2012 +0100
>> +++ b/xen/arch/x86/domain.c	Mon Jan 16 22:08:09 2012 +0100
>> @@ -422,6 +422,10 @@ int vcpu_initialise(struct vcpu *v)
>>       if ( (rc = vcpu_init_fpu(v)) != 0 )
>>           return rc;
>>
>> +    /* Vendor-specific initialization */
>> +    if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
>
> If you check the vendor here, you don''t need to anywhere in the
> descendant functions.
>
>> +        amd_vcpu_initialise(v);
>> +
>>       if ( is_hvm_domain(d) )
>>       {
>>           rc = hvm_vcpu_initialise(v);
>> --- a/xen/arch/x86/hvm/svm/svm.c	Mon Jan 09 16:01:44 2012 +0100
>> +++ b/xen/arch/x86/hvm/svm/svm.c	Mon Jan 16 22:08:09 2012 +0100
>> @@ -1044,6 +1044,27 @@ static void svm_init_erratum_383(struct
>>       }
>>   }
>>
>> +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t
*val, uint read)
>> +{
>> +    uint eax, ebx, ecx, edx;
>> +
>> +    /* Guest OSVW support */
>> +    hvm_cpuid(0x80000001,&eax,&ebx,&ecx,&edx);
>> +    if (!test_bit((X86_FEATURE_OSVW&  31),&ecx))
>
> Formatting of changes to this file should be changed to Xen style.
>
>> +        return -1;
>> +
>> +    if (read) {
>> +        if (msr == MSR_AMD_OSVW_ID_LENGTH)
>> +            *val = v->arch.amd.osvw.length;
>> +        else
>> +            *val = v->arch.amd.osvw.status;
>> +    }
>> +    /* Writes are ignored */
>> +
>> +    return 0;
>> +}
>> +
>> +
>>   static int svm_cpu_up(void)
>>   {
>>       uint64_t msr_content;
>> --- a/xen/arch/x86/traps.c	Mon Jan 09 16:01:44 2012 +0100
>> +++ b/xen/arch/x86/traps.c	Mon Jan 16 22:08:09 2012 +0100
>> @@ -2542,6 +2542,15 @@ static int emulate_privileged_op(struct
>>               if ( wrmsr_safe(regs->ecx, msr_content) != 0 )
>>                   goto fail;
>>               break;
>> +        case MSR_AMD_OSVW_ID_LENGTH:
>> +        case MSR_AMD_OSVW_STATUS:
>> +            if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) {
>> +                if (!boot_cpu_has(X86_FEATURE_OSVW))
>> +                    goto fail;
>> +                else
>
> Bogus "else" after a "goto". And I wonder, provided
there is some
> point in doing all this for PV guests in the first place, whether this
> shouldn''t be kept getting handled by the default case.

If we go into default case we will emit "Domain attempted WRMSR ..." 
message in the log. I was trying to avoid that since writing to these 
registers is not really an error, it just gets ignored.

I''ll fix the rest.

Thanks for review.
-boris
>
>> +                    break; /* Writes are ignored */
>> +            }
>> +            /* Fall through to default case */
>>           default:
>>               if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) )
>>                   break;
>> @@ -2573,6 +2582,7 @@ static int emulate_privileged_op(struct
>>           break;
>>
>>       case 0x32: /* RDMSR */
>> +
>
> Stray addition of a newline.
>
>>           switch ( (u32)regs->ecx )
>>           {
>>   #ifdef CONFIG_X86_64
>> @@ -2632,6 +2642,23 @@ static int emulate_privileged_op(struct
>>               regs->eax = (uint32_t)msr_content;
>>               regs->edx = (uint32_t)(msr_content>>  32);
>>               break;
>> +        case MSR_AMD_OSVW_ID_LENGTH:
>> +        case MSR_AMD_OSVW_STATUS:
>> +            if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
>> +                if (!boot_cpu_has(X86_FEATURE_OSVW))
>> +                    goto fail;
>> +                else {
>
> Bogus "else" after a "goto".
>
> Jan
>
>> +                    if ((u32)regs->ecx == MSR_AMD_OSVW_ID_LENGTH)
>> +                        msr_content = v->arch.amd.osvw.length;
>> +                    else
>> +                        msr_content = v->arch.amd.osvw.status;
>> +
>> +                    regs->eax = (uint32_t)msr_content;
>> +                    regs->edx = (uint32_t)(msr_content>> 
32);
>> +                }
>> +            } else
>> +                goto rdmsr_normal;
>> +            break;
>>           default:
>>               if ( rdmsr_hypervisor_regs(regs->ecx,&val) )
>>               {
>
>

Jan Beulich

2012-Jan-18 09:50 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 17.01.12 at 18:54, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> On 01/17/12 03:26, Jan Beulich wrote:
>>>>> On 16.01.12 at 22:11, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>>> --- a/tools/libxc/xc_cpuid_x86.c	Mon Jan 09 16:01:44 2012 +0100
>>> +++ b/tools/libxc/xc_cpuid_x86.c	Mon Jan 16 22:08:09 2012 +0100
>>> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
>>>                       bitmaskof(X86_FEATURE_SSE4A) |
>>>                       bitmaskof(X86_FEATURE_MISALIGNSSE) |
>>>                       bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
>>> +		    bitmaskof(X86_FEATURE_OSVW) |
>>
>> Indentation.
>>
>> Also, is this really meaningful to PV guests?
> 
> Does amd_xc_cpuid_policy() get called for PV guests? I thought this is 
> HVM path only.
In that case I simply misplaced the comment.
> xc_cpuid_pv_policy() is where clear_bit(X86_FEATURE_OSVW, regs[2]) was 
> removed to handle PV.
> 
> I believe this (i.e. OSVW changes) is meaningful for PV. Taking erratum 
> 400 as an example -- we don''t need a Linux PV guest reading an MSR
> before going to idle (in amd_e400_idle()).
It is bogus in the first place if a pv guest does so - after all, not doing
stuff like this is the nature of being pv.
>> And valid for pre-Fam10?
> 
> No, it indeed shouldn''t be set in those cases.
> 
> I could set the bit conditionally, based on host family but the trouble 
> is I don''t necessarily know what family the guest will be. Or is
this
> known at this point?
I think basing this on the host family is a good first shot. I would hope
others could comment on how to properly deal with a dependency like
this...
>>> --- a/xen/arch/x86/cpu/amd.c	Mon Jan 09 16:01:44 2012 +0100
>>> +++ b/xen/arch/x86/cpu/amd.c	Mon Jan 16 22:08:09 2012 +0100
>>> @@ -32,6 +32,13 @@
>>>   static char opt_famrev[14];
>>>   string_param("cpuid_mask_cpu", opt_famrev);
>>>
>>> +/*
>>> + * Set osvw_len to higher number when updated Revision Guides
>>> + * are published and we know what the new status bits are
>>> + */
>>
>> This is ugly, as it requires permanent attention. The value to start
>> with should really be 64 (beyond which other code adjustments are
>> going to be necessary anyway).
> 
> I went back and forth on this. The reason I settled on 4 is because we 
> obviously don''t know what errata the higher bits will cover and
because
> it is (at least theoretically) possible that a guest shouldn''t see
the
> same status bits as the host.
I think allowing the guest to know of (not) worked around errata is
even desirable (aiui in the worst case it''ll apply a redundant fix).
>>> +    vcpu->arch.amd.osvw.status = osvw_status&  ~(6ULL);
>>> +
>>> +    /*
>>> +     * By increasing VCPU''s osvw.length to 3 we are
telling the guest that
>>> +     * all osvw.status bits inside that length, including bit 0
(which is
>>> +     * reserved for erratum 298), are valid. However, if host
processor''s
>>> +     * osvw_len is 0 then osvw_status[0] carries no information.
We need to
>>> +     * be conservative here and therefore we tell the guest that
erratum
> 298
>>> +     * is present (because we really don''t know).
>>> +     */
>>> +    if (osvw_length == 0&&  boot_cpu_data.x86 == 0x10)
>>
>> Why do you check the family here? If osvw_length can only ever be
>> zero on Fam10, then the first check is sufficient. If osvw_length can
>> be zero on other than Fam10 (no matter whether we bail early older
>> CPUs), then the check is actually wrong.
> 
> 10h is the only family affected by this erratum.
What is "this erratum" here? My comment was to suggest that either
of the two conditions can likely be eliminated for being redundant.
>>> --- a/xen/arch/x86/traps.c	Mon Jan 09 16:01:44 2012 +0100
>>> +++ b/xen/arch/x86/traps.c	Mon Jan 16 22:08:09 2012 +0100
>>> @@ -2542,6 +2542,15 @@ static int emulate_privileged_op(struct
>>>               if ( wrmsr_safe(regs->ecx, msr_content) != 0 )
>>>                   goto fail;
>>>               break;
>>> +        case MSR_AMD_OSVW_ID_LENGTH:
>>> +        case MSR_AMD_OSVW_STATUS:
>>> +            if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) {
>>> +                if (!boot_cpu_has(X86_FEATURE_OSVW))
>>> +                    goto fail;
>>> +                else
>>
>> Bogus "else" after a "goto". And I wonder, provided
there is some
>> point in doing all this for PV guests in the first place, whether this
>> shouldn''t be kept getting handled by the default case.
> 
> 
> If we go into default case we will emit "Domain attempted WRMSR
..."
> message in the log. I was trying to avoid that since writing to these 
> registers is not really an error, it just gets ignored.
The documentation says that both MSRs are read/write, so issuing a
message here appears to be The Right Thing (tm). Kernels shouldn''t
be doing this in general, and for PV kernels imo it is definitely a bug.

Jan

Boris Ostrovsky

2012-Jan-18 18:26 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

On 01/18/12 04:50, Jan Beulich wrote:>>>> On 17.01.12 at 18:54, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>> On 01/17/12 03:26, Jan Beulich wrote:
>>>>>> On 16.01.12 at 22:11, Boris
Ostrovsky<boris.ostrovsky@amd.com>   wrote:
>>>> --- a/tools/libxc/xc_cpuid_x86.c	Mon Jan 09 16:01:44 2012 +0100
>>>> +++ b/tools/libxc/xc_cpuid_x86.c	Mon Jan 16 22:08:09 2012 +0100
>>>> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
>>>>                        bitmaskof(X86_FEATURE_SSE4A) |
>>>>                        bitmaskof(X86_FEATURE_MISALIGNSSE) |
>>>>                        bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
>>>> +		    bitmaskof(X86_FEATURE_OSVW) |
>>>
>>> Indentation.
>>>
>>> Also, is this really meaningful to PV guests?
>>
>> Does amd_xc_cpuid_policy() get called for PV guests? I thought this is
>> HVM path only.
>
> In that case I simply misplaced the comment.
>
>> xc_cpuid_pv_policy() is where clear_bit(X86_FEATURE_OSVW, regs[2]) was
>> removed to handle PV.
>>
>> I believe this (i.e. OSVW changes) is meaningful for PV. Taking erratum
>> 400 as an example -- we don''t need a Linux PV guest reading an
MSR
>> before going to idle (in amd_e400_idle()).
>
> It is bogus in the first place if a pv guest does so - after all, not doing
> stuff like this is the nature of being pv.
It was actually a bad example --- the guest is not using amd_e400_idle() 
and so there are no extra MSR accesses.

However, without this change OSVW bit will not show up in the guest''s 
CPUID and I think guests should be able to see it. One could argue 
whether or not we should mask off status bits for the two errata (400 
and 415) since they are not currently used; I''d mask them off just to
be
consistent with HVM, it won''t affect anything.
>>>> +    vcpu->arch.amd.osvw.length = (osvw_length >= 3) ?
(osvw_length) : 3
>>>> +    vcpu->arch.amd.osvw.status = osvw_status&  
~(6ULL);
>>>> +
>>>> +    /*
>>>> +     * By increasing VCPU''s osvw.length to 3 we are
telling the guest that
>>>> +     * all osvw.status bits inside that length, including bit
0 (which is
>>>> +     * reserved for erratum 298), are valid. However, if host
processor''s
>>>> +     * osvw_len is 0 then osvw_status[0] carries no
information. We need to
>>>> +     * be conservative here and therefore we tell the guest
that erratum
>> 298
>>>> +     * is present (because we really don''t know).
>>>> +     */
>>>> +    if (osvw_length == 0&&   boot_cpu_data.x86 ==
0x10)
>>>
>>> Why do you check the family here? If osvw_length can only ever be
>>> zero on Fam10, then the first check is sufficient. If osvw_length
can
>>> be zero on other than Fam10 (no matter whether we bail early older
>>> CPUs), then the check is actually wrong.
>>
>> 10h is the only family affected by this erratum.
>
> What is "this erratum" here? My comment was to suggest that
either
> of the two conditions can likely be eliminated for being redundant.
("This erratum" is erratum 298, which is bit 0)

If osvw_length!=0 then we don''t need to do anything since bit 0 is 
already valid.

If osvw_length==0 then -- since we just made the guest''s version of
this
register 3 and thus turned bit 0 from being invalid to valid -- we need 
to do something about bit 0. But the bit can only be set on family 10h, 
thus both checks.


Thanks.
-boris

Jan Beulich

2012-Jan-19 08:10 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 18.01.12 at 19:26, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> On 01/18/12 04:50, Jan Beulich wrote:
>>>>> On 17.01.12 at 18:54, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>>> I believe this (i.e. OSVW changes) is meaningful for PV. Taking
erratum
>>> 400 as an example -- we don''t need a Linux PV guest
reading an MSR
>>> before going to idle (in amd_e400_idle()).
>>
>> It is bogus in the first place if a pv guest does so - after all, not
doing
>> stuff like this is the nature of being pv.
> 
> It was actually a bad example --- the guest is not using amd_e400_idle() 
> and so there are no extra MSR accesses.
> 
> However, without this change OSVW bit will not show up in the
guest''s
> CPUID and I think guests should be able to see it. One could argue 
> whether or not we should mask off status bits for the two errata (400 
> and 415) since they are not currently used; I''d mask them off just
to be
> consistent with HVM, it won''t affect anything.
I continue to think otherwise - knowing of and dealing with this is
supposed to be entirely hidden from PV guests, unless and until
you can provide a counter example. Therefore I am likely to nak
this part of future revisions of the patch (which Keir could
certainly override), up to and including ripping out the PV part (and
adjusting the rest accordingly) if I would go for committing it.
>>>>> +    if (osvw_length == 0&&   boot_cpu_data.x86 ==
0x10)
>>>>
>>>> Why do you check the family here? If osvw_length can only ever
be
>>>> zero on Fam10, then the first check is sufficient. If
osvw_length can
>>>> be zero on other than Fam10 (no matter whether we bail early
older
>>>> CPUs), then the check is actually wrong.
>>>
>>> 10h is the only family affected by this erratum.
>>
>> What is "this erratum" here? My comment was to suggest that
either
>> of the two conditions can likely be eliminated for being redundant.
> 
> ("This erratum" is erratum 298, which is bit 0)
> 
> If osvw_length!=0 then we don''t need to do anything since bit 0 is
> already valid.
> 
> If osvw_length==0 then -- since we just made the guest''s version
of this
> register 3 and thus turned bit 0 from being invalid to valid -- we need 
> to do something about bit 0. But the bit can only be set on family 10h, 
> thus both checks.
Okay, got you. However, the whole thing will become superfluous
anyway if you bail on family less than 0x10 right at the start of the
function.

Btw., one more comment on the change to init_amd(): You will likely
need to distinguish BP and AP cases here - on the BP you want to
plainly write the values read from the MSRs to the global variables,
but on APs (with possibly different settings) you need to work
towards a setting of the global variables that apply to all of the
CPUs. This is not just for the (theoretical only?) hotplug case, but
particularly the one of a soft-offlined CPU that had its microcode
updated already in a way affecting the OSVW bits and is
subsequently being brought back online.

Which reminds you and me that the patch is missing integration with
the microcode update loader (as that one may alter the OSVW bits
iiuc).

Jan

Keir Fraser

2012-Jan-19 08:46 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

On 19/01/2012 08:10, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>> On 18.01.12 at 19:26, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
>> On 01/18/12 04:50, Jan Beulich wrote:
>>>>>> On 17.01.12 at 18:54, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>>>> I believe this (i.e. OSVW changes) is meaningful for PV. Taking
erratum
>>>> 400 as an example -- we don''t need a Linux PV guest
reading an MSR
>>>> before going to idle (in amd_e400_idle()).
>>> 
>>> It is bogus in the first place if a pv guest does so - after all,
not doing
>>> stuff like this is the nature of being pv.
>> 
>> It was actually a bad example --- the guest is not using
amd_e400_idle()
>> and so there are no extra MSR accesses.
>> 
>> However, without this change OSVW bit will not show up in the
guest''s
>> CPUID and I think guests should be able to see it. One could argue
>> whether or not we should mask off status bits for the two errata (400
>> and 415) since they are not currently used; I''d mask them off
just to be
>> consistent with HVM, it won''t affect anything.
> 
> I continue to think otherwise - knowing of and dealing with this is
> supposed to be entirely hidden from PV guests, unless and until
> you can provide a counter example. Therefore I am likely to nak
> this part of future revisions of the patch (which Keir could
> certainly override), up to and including ripping out the PV part (and
> adjusting the rest accordingly) if I would go for committing it.
Well, the general principle of exposing OSVW to PV guests doesn''t seem
terrible. It''s just the current specific motivation for exposing to HVM
guests does not apply to PV guests.

Are *any* of the current OSVW defined bits at all useful or applicable to PV
guests?

 -- Keir
>>>>>> +    if (osvw_length == 0&&   boot_cpu_data.x86
== 0x10)
>>>>> 
>>>>> Why do you check the family here? If osvw_length can only
ever be
>>>>> zero on Fam10, then the first check is sufficient. If
osvw_length can
>>>>> be zero on other than Fam10 (no matter whether we bail
early older
>>>>> CPUs), then the check is actually wrong.
>>>> 
>>>> 10h is the only family affected by this erratum.
>>> 
>>> What is "this erratum" here? My comment was to suggest
that either
>>> of the two conditions can likely be eliminated for being redundant.
>> 
>> ("This erratum" is erratum 298, which is bit 0)
>> 
>> If osvw_length!=0 then we don''t need to do anything since bit
0 is
>> already valid.
>> 
>> If osvw_length==0 then -- since we just made the guest''s
version of this
>> register 3 and thus turned bit 0 from being invalid to valid -- we need
>> to do something about bit 0. But the bit can only be set on family 10h,
>> thus both checks.
> 
> Okay, got you. However, the whole thing will become superfluous
> anyway if you bail on family less than 0x10 right at the start of the
> function.
> 
> Btw., one more comment on the change to init_amd(): You will likely
> need to distinguish BP and AP cases here - on the BP you want to
> plainly write the values read from the MSRs to the global variables,
> but on APs (with possibly different settings) you need to work
> towards a setting of the global variables that apply to all of the
> CPUs. This is not just for the (theoretical only?) hotplug case, but
> particularly the one of a soft-offlined CPU that had its microcode
> updated already in a way affecting the OSVW bits and is
> subsequently being brought back online.
> 
> Which reminds you and me that the patch is missing integration with
> the microcode update loader (as that one may alter the OSVW bits
> iiuc).
> 
> Jan
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

Boris Ostrovsky

2012-Jan-19 15:22 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

On 01/19/12 03:10, Jan Beulich wrote:
>
> Btw., one more comment on the change to init_amd(): You will likely
> need to distinguish BP and AP cases here - on the BP you want to
> plainly write the values read from the MSRs to the global variables,
> but on APs (with possibly different settings) you need to work
> towards a setting of the global variables that apply to all of the
> CPUs. This is not just for the (theoretical only?) hotplug case, but
> particularly the one of a soft-offlined CPU that had its microcode
> updated already in a way affecting the OSVW bits and is
> subsequently being brought back online.

Not sure I follow you. There is no difference in how BIOS/microcode sets 
OSVW bits on AP or BP, that''s based on silicon version (i.e. stepping) 
and not where that silicon is plugged in.

If a core is offlined that will presumably happen after it has already 
gone through init_amd() and therefore global versions of the registers 
already accounted for that processor''s values. (The downside is that if
the offlined processors has more bugs than the rest then everyone is 
considered as bad even though the bad processor is out of the action. 
But I think the positives of updating global variables after a cpu is 
offlined are not worth the complexity.)
>
> Which reminds you and me that the patch is missing integration with
> the microcode update loader (as that one may alter the OSVW bits
> iiuc).
That''s a good point --- I should re-initialize the global variables 
after a patch is loaded.

Thanks.
-boris

Boris Ostrovsky

2012-Jan-19 15:57 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

On 01/19/12 03:46, Keir Fraser wrote:> On 19/01/2012 08:10, "Jan Beulich"<JBeulich@suse.com> 
wrote:
>
>>>>> On 18.01.12 at 19:26, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>>> On 01/18/12 04:50, Jan Beulich wrote:
>>>>>>> On 17.01.12 at 18:54, Boris
Ostrovsky<boris.ostrovsky@amd.com>   wrote:
>>>>> I believe this (i.e. OSVW changes) is meaningful for PV.
Taking erratum
>>>>> 400 as an example -- we don''t need a Linux PV
guest reading an MSR
>>>>> before going to idle (in amd_e400_idle()).
>>>>
>>>> It is bogus in the first place if a pv guest does so - after
all, not doing
>>>> stuff like this is the nature of being pv.
>>>
>>> It was actually a bad example --- the guest is not using
amd_e400_idle()
>>> and so there are no extra MSR accesses.
>>>
>>> However, without this change OSVW bit will not show up in the
guest''s
>>> CPUID and I think guests should be able to see it. One could argue
>>> whether or not we should mask off status bits for the two errata
(400
>>> and 415) since they are not currently used; I''d mask them
off just to be
>>> consistent with HVM, it won''t affect anything.
>>
>> I continue to think otherwise - knowing of and dealing with this is
>> supposed to be entirely hidden from PV guests, unless and until
>> you can provide a counter example. Therefore I am likely to nak
>> this part of future revisions of the patch (which Keir could
>> certainly override), up to and including ripping out the PV part (and
>> adjusting the rest accordingly) if I would go for committing it.
>
> Well, the general principle of exposing OSVW to PV guests doesn''t
seem
> terrible. It''s just the current specific motivation for exposing
to HVM
> guests does not apply to PV guests.
>
> Are *any* of the current OSVW defined bits at all useful or applicable to
PV
> guests?
>
(Wrong "Reply" button)

Probably not with current bits, at least for Linux PV guests.

-boris

Jan Beulich

2012-Jan-19 16:29 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 19.01.12 at 16:22, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> On 01/19/12 03:10, Jan Beulich wrote:
> 
>>
>> Btw., one more comment on the change to init_amd(): You will likely
>> need to distinguish BP and AP cases here - on the BP you want to
>> plainly write the values read from the MSRs to the global variables,
>> but on APs (with possibly different settings) you need to work
>> towards a setting of the global variables that apply to all of the
>> CPUs. This is not just for the (theoretical only?) hotplug case, but
>> particularly the one of a soft-offlined CPU that had its microcode
>> updated already in a way affecting the OSVW bits and is
>> subsequently being brought back online.
> 
> 
> Not sure I follow you. There is no difference in how BIOS/microcode sets 
> OSVW bits on AP or BP, that''s based on silicon version (i.e.
stepping)
> and not where that silicon is plugged in.
But mixed stepping systems could have values in an AP that are
different from what the BP (and earlier APs) had.
> If a core is offlined that will presumably happen after it has already 
> gone through init_amd() and therefore global versions of the registers 
> already accounted for that processor''s values. (The downside is
that if
> the offlined processors has more bugs than the rest then everyone is 
> considered as bad even though the bad processor is out of the action. 
> But I think the positives of updating global variables after a cpu is 
> offlined are not worth the complexity.)
Yes, optimizing for this case is very unlikely to be necessary.

Jan

Boris Ostrovsky

2012-Jan-30 17:05 UTC

head link

[PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@amd.com>
# Date 1327942850 -3600
# Node ID 1361d9895cb17aa8fce1054120311285144b8399
# Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
x86/AMD: Add support for AMD''s OSVW feature in guests.

In some cases guests should not provide workarounds for errata even when the
physical processor is affected. For example, because of erratum 400 on family
10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
going to idle in order to avoid getting stuck in a non-C0 state. This is not
necessary: HLT and IO instructions are intercepted and therefore there is no
reason for erratum 400 workaround in the guest.

This patch allows us to present a guest with certain errata as fixed,
regardless of the state of actual hardware.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
Acked-by: Christoph Egger <Christoph.Egger@amd.com>

diff -r e2722b24dc09 -r 1361d9895cb1 tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/tools/libxc/xc_cpuid_x86.c	Mon Jan 30 18:00:50 2012 +0100
@@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
                     bitmaskof(X86_FEATURE_SSE4A) |
                     bitmaskof(X86_FEATURE_MISALIGNSSE) |
                     bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
+                    bitmaskof(X86_FEATURE_OSVW) |
                     bitmaskof(X86_FEATURE_XOP) |
                     bitmaskof(X86_FEATURE_FMA4) |
                     bitmaskof(X86_FEATURE_TBM) |
diff -r e2722b24dc09 -r 1361d9895cb1 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c	Mon Jan 30 18:00:50 2012 +0100
@@ -83,6 +83,9 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
 
 static bool_t amd_erratum383_found __read_mostly;
 
+/* OSVW bits */
+static uint64_t osvw_length, osvw_status;
+
 void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
 {
     struct vcpu *curr = current;
@@ -902,6 +905,60 @@ static void svm_do_resume(struct vcpu *v
     reset_stack_and_jump(svm_asm_do_resume);
 }
 
+static void svm_guest_osvw_init(struct vcpu *vcpu)
+{
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+        return;
+
+    /*
+     * Guests should see errata 400 and 415 as fixed (assuming that
+     * HLT and IO instructions are intercepted).
+     */
+    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length :
3;
+    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
+
+    /*
+     * By increasing VCPU''s osvw.length to 3 we are telling the guest
that
+     * all osvw.status bits inside that length, including bit 0 (which is
+     * reserved for erratum 298), are valid. However, if host
processor''s
+     * osvw_len is 0 then osvw_status[0] carries no information. We need to
+     * be conservative here and therefore we tell the guest that erratum 298
+     * is present (because we really don''t know).
+     */
+    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
+        vcpu->arch.hvm_svm.osvw.status |= 1;
+}
+
+void svm_host_osvw_reset()
+{
+    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
+    osvw_status = 0;
+}
+
+void svm_host_osvw_init()
+{
+    /* 
+     * Get OSVW bits. If bits are not the same on different processors then
+     * choose the worst case (i.e. if erratum is present on one processor and
+     * not on another assume that the erratum is present everywhere).
+     */
+    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
+    {
+        uint64_t len, status;
+         
+        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
+             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
+            len = status = 0;
+
+        if (len < osvw_length)
+            osvw_length = len;
+
+        osvw_status |= status;
+        osvw_status &= (1ULL << osvw_length) - 1;
+    } else
+        osvw_length = osvw_status = 0;
+}
+
 static int svm_domain_initialise(struct domain *d)
 {
     return 0;
@@ -930,6 +987,9 @@ static int svm_vcpu_initialise(struct vc
     }
 
     vpmu_initialise(v);
+
+    svm_guest_osvw_init(v);
+
     return 0;
 }
 
@@ -1044,6 +1104,27 @@ static void svm_init_erratum_383(struct 
     }
 }
 
+static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, bool_t
read)
+{
+    uint eax, ebx, ecx, edx;
+     
+    /* Guest OSVW support */
+    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
+        return -1;
+
+    if ( read )
+    {
+        if (msr == MSR_AMD_OSVW_ID_LENGTH)
+            *val = v->arch.hvm_svm.osvw.length;
+        else
+            *val = v->arch.hvm_svm.osvw.status;
+    } 
+    /* Writes are ignored */
+
+    return 0;
+}
+
 static int svm_cpu_up(void)
 {
     uint64_t msr_content;
@@ -1094,6 +1175,9 @@ static int svm_cpu_up(void)
     }
 #endif
 
+    /* Initialize OSVW bits to be used by guests */
+    svm_host_osvw_init();
+
     return 0;
 }
 
@@ -1104,6 +1188,8 @@ struct hvm_function_table * __init start
     if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
         return NULL;
 
+    svm_host_osvw_reset();
+
     if ( svm_cpu_up() )
     {
         printk("SVM: failed to initialise.\n");
@@ -1388,6 +1474,13 @@ static int svm_msr_read_intercept(unsign
         vpmu_do_rdmsr(msr, msr_content);
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, msr_content, 1);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_rdmsr(v, msr, msr_content);
         if ( ret < 0 )
@@ -1512,6 +1605,13 @@ static int svm_msr_write_intercept(unsig
          */
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, &msr_content, 0);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_wrmsr(v, msr, msr_content);
         if ( ret < 0 )
diff -r e2722b24dc09 -r 1361d9895cb1 xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/microcode.c	Mon Jan 30 18:00:50 2012 +0100
@@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
     info->error = 0;
     info->cpu = cpumask_first(&cpu_online_map);
 
+    if ( microcode_ops->start_update )
+    {
+	ret = microcode_ops->start_update();
+	if ( ret != 0 )
+        {
+	    xfree(info);
+	    return ret;
+	}
+    }
+
     return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
 }
 
diff -r e2722b24dc09 -r 1361d9895cb1 xen/arch/x86/microcode_amd.c
--- a/xen/arch/x86/microcode_amd.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/microcode_amd.c	Mon Jan 30 18:00:50 2012 +0100
@@ -25,6 +25,7 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/microcode.h>
+#include <asm/hvm/svm/svm.h>
 
 struct equiv_cpu_entry {
     uint32_t installed_cpu;
@@ -287,7 +288,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! Wrong "
                "microcode patch file magic\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_amd = xmalloc(struct microcode_amd);
@@ -295,7 +297,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! "
                "Can not allocate memory for microcode patch\n");
-        return -ENOMEM;
+        error = -ENOMEM;
+        goto out;
     }
 
     error = install_equiv_cpu_table(mc_amd, buf, &offset);
@@ -303,7 +306,8 @@ static int cpu_request_microcode(int cpu
     {
         xfree(mc_amd);
         printk(KERN_ERR "microcode: installing equivalent cpu table
failed\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_old = uci->mc.mc_amd;
@@ -337,13 +341,18 @@ static int cpu_request_microcode(int cpu
     /* On success keep the microcode patch for
      * re-apply on resume.
      */
-    if (error == 1)
+    if ( error == 1 )
     {
         xfree(mc_old);
-        return 0;
+        error = 0;
+    } else 
+    {
+        xfree(mc_amd);
+        uci->mc.mc_amd = mc_old;
     }
-    xfree(mc_amd);
-    uci->mc.mc_amd = mc_old;
+
+  out:
+    svm_host_osvw_init();
 
     return error;
 }
@@ -395,11 +404,19 @@ err1:
     return -ENOMEM;
 }
 
+static int start_update(void) 
+{
+    svm_host_osvw_reset();
+
+    return 0;
+}
+
 static const struct microcode_ops microcode_amd_ops = {
     .microcode_resume_match           = microcode_resume_match,
     .cpu_request_microcode            = cpu_request_microcode,
     .collect_cpu_info                 = collect_cpu_info,
     .apply_microcode                  = apply_microcode,
+    .start_update                     = start_update,
 };
 
 static __init int microcode_init_amd(void)
diff -r e2722b24dc09 -r 1361d9895cb1 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/platform_hypercall.c	Mon Jan 30 18:00:50 2012 +0100
@@ -166,7 +166,17 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
             break;
 
         guest_from_compat_handle(data, op->u.microcode.data);
+
+        while ( !domctl_lock_acquire() )
+            if ( hypercall_preempt_check() )
+            {
+                ret = hypercall_create_continuation(
+                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
+                goto out;
+            }
+
         ret = microcode_update(data, op->u.microcode.length);
+        domctl_lock_release();
     }
     break;
 
diff -r e2722b24dc09 -r 1361d9895cb1 xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/hvm/svm/svm.h	Mon Jan 30 18:00:50 2012 +0100
@@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
                                   ~TSC_RATIO_RSVD_BITS )
 #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz,
cpu_khz)
 
+extern void svm_host_osvw_reset(void);
+extern void svm_host_osvw_init(void);
+
 #endif /* __ASM_X86_HVM_SVM_H__ */
diff -r e2722b24dc09 -r 1361d9895cb1 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Mon Jan 30 18:00:50 2012 +0100
@@ -516,6 +516,12 @@ struct arch_svm_struct {
     
     /* AMD lightweight profiling MSR */
     uint64_t guest_lwp_cfg;
+
+    /* OSVW MSRs */
+    struct {
+        u64 length;
+        u64 status;
+    } osvw;
 };
 
 struct vmcb_struct *alloc_vmcb(void);
diff -r e2722b24dc09 -r 1361d9895cb1 xen/include/asm-x86/microcode.h
--- a/xen/include/asm-x86/microcode.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/microcode.h	Mon Jan 30 18:00:50 2012 +0100
@@ -11,6 +11,7 @@ struct microcode_ops {
     int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
     int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
     int (*apply_microcode)(int cpu);
+    int (*start_update)(void);
 };
 
 struct cpu_signature {

Jan Beulich

2012-Jan-31 10:13 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 30.01.12 at 18:05, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> --- a/xen/arch/x86/microcode.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode.c	Mon Jan 30 18:00:50 2012 +0100
> @@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
>      info->error = 0;
>      info->cpu = cpumask_first(&cpu_online_map);
>  
> +    if ( microcode_ops->start_update )
> +    {
> +	ret = microcode_ops->start_update();
> +	if ( ret != 0 )
> +        {
> +	    xfree(info);
> +	    return ret;
> +	}
Indentation (no tabs).
> +    }
> +
>      return continue_hypercall_on_cpu(info->cpu, do_microcode_update,
info);
>  }
>  
> --- a/xen/arch/x86/microcode_amd.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode_amd.c	Mon Jan 30 18:00:50 2012 +0100
> @@ -337,13 +341,18 @@ static int cpu_request_microcode(int cpu
>      /* On success keep the microcode patch for
>       * re-apply on resume.
>       */
> -    if (error == 1)
> +    if ( error == 1 )
>      {
>          xfree(mc_old);
> -        return 0;
> +        error = 0;
> +    } else 
    }
    else
> +    {
> +        xfree(mc_amd);
> +        uci->mc.mc_amd = mc_old;
>      }
> -    xfree(mc_amd);
> -    uci->mc.mc_amd = mc_old;
> +
> +  out:
> +    svm_host_osvw_init();
>  
>      return error;
>  }
> --- a/xen/arch/x86/platform_hypercall.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/platform_hypercall.c	Mon Jan 30 18:00:50 2012 +0100
> @@ -166,7 +166,17 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
>              break;
>  
>          guest_from_compat_handle(data, op->u.microcode.data);
> +
> +        while ( !domctl_lock_acquire() )
It''s not clear to me what you''re trying to protect against
here. I don''t
think this prevents guests from making progress (and hence possibly
seeing the intermediate [and wrong] OSVW state between
svm_host_osvw_reset() and svm_host_osvw_init()).

Jan
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret = hypercall_create_continuation(
> +                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
> +                goto out;
> +            }
> +
>          ret = microcode_update(data, op->u.microcode.length);
> +        domctl_lock_release();
>      }
>      break;
>

Christoph Egger

2012-Jan-31 10:36 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

On 01/31/12 11:13, Jan Beulich wrote:>> --- a/xen/arch/x86/platform_hypercall.c	Thu Jan 26 17:43:31 2012 +0000
>> +++ b/xen/arch/x86/platform_hypercall.c	Mon Jan 30 18:00:50 2012 +0100
>> @@ -166,7 +166,17 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
>>               break;
>>
>>           guest_from_compat_handle(data, op->u.microcode.data);
>> +
>> +        while ( !domctl_lock_acquire() )
>
> It''s not clear to me what you''re trying to protect
against here. I don''t
> think this prevents guests from making progress (and hence possibly
> seeing the intermediate [and wrong] OSVW state between
> svm_host_osvw_reset() and svm_host_osvw_init()).
The reason is because when we allocated VCPUs (via XEN_DOMCTL_max_vcpus 
and possibly other controls) we call svm_vcpu_initialise() -> 
svm_guest_osvw_init(). If we are in the middle of microcode update then 
osvw_length/osvw_status may be in an inconsistent state.

Christoph
> Jan
>
>> +            if ( hypercall_preempt_check() )
>> +            {
>> +                ret = hypercall_create_continuation(
>> +                    __HYPERVISOR_platform_op, "h",
u_xenpf_op);
>> +                goto out;
>> +            }
>> +
>>           ret = microcode_update(data, op->u.microcode.length);
>> +        domctl_lock_release();
>>       }
>>       break;
>>
>
>
>

-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

Keir Fraser

2012-Jan-31 10:39 UTC

head link

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

On 31/01/2012 10:36, "Christoph Egger" <Christoph.Egger@amd.com>
wrote:
> On 01/31/12 11:13, Jan Beulich wrote:
>>> --- a/xen/arch/x86/platform_hypercall.c Thu Jan 26 17:43:31 2012
+0000
>>> +++ b/xen/arch/x86/platform_hypercall.c Mon Jan 30 18:00:50 2012
+0100
>>> @@ -166,7 +166,17 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
>>>               break;
>>> 
>>>           guest_from_compat_handle(data, op->u.microcode.data);
>>> +
>>> +        while ( !domctl_lock_acquire() )
>> 
>> It''s not clear to me what you''re trying to protect
against here. I don''t
>> think this prevents guests from making progress (and hence possibly
>> seeing the intermediate [and wrong] OSVW state between
>> svm_host_osvw_reset() and svm_host_osvw_init()).
> 
> The reason is because when we allocated VCPUs (via XEN_DOMCTL_max_vcpus
> and possibly other controls) we call svm_vcpu_initialise() ->
> svm_guest_osvw_init(). If we are in the middle of microcode update then
> osvw_length/osvw_status may be in an inconsistent state.
Deserves a code comment. Even better, deserves its own synchronisation
(e.g., your own specific lock for this purpose). It would be nice to avoid
further intricate dependencies on the big domctl lock, so we can consider
removing it in future.

 -- Keir
> Christoph
> 
>> Jan
>> 
>>> +            if ( hypercall_preempt_check() )
>>> +            {
>>> +                ret = hypercall_create_continuation(
>>> +                    __HYPERVISOR_platform_op, "h",
u_xenpf_op);
>>> +                goto out;
>>> +            }
>>> +
>>>           ret = microcode_update(data, op->u.microcode.length);
>>> +        domctl_lock_release();
>>>       }
>>>       break;
>>> 
>> 
>> 
>> 
>

Xen devel - Jan 2012 - [PATCH] x86/AMD: Add support for AMD's OSVW feature in guests

[PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

[PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH] x86/AMD: Add support for AMD''s OSVW feature in guests