thr3ads.net - Xen devel - [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests [Feb 2012]

If this information is useful, please help other people find it:
Share via:

Boris Ostrovsky

2012-Feb-01 16:30 UTC

[PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@amd.com>
# Date 1328108207 -3600
# Node ID 789bbf4f478b0e81d71240a1f1147ef66a7c8848
# Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
x86/AMD: Add support for AMD''s OSVW feature in guests.

In some cases guests should not provide workarounds for errata even when the
physical processor is affected. For example, because of erratum 400 on family
10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
going to idle in order to avoid getting stuck in a non-C0 state. This is not
necessary: HLT and IO instructions are intercepted and therefore there is no
reason for erratum 400 workaround in the guest.

This patch allows us to present a guest with certain errata as fixed,
regardless of the state of actual hardware.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
Acked-by: Christoph Egger <Christoph.Egger@amd.com>

diff -r e2722b24dc09 -r 789bbf4f478b tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/tools/libxc/xc_cpuid_x86.c	Wed Feb 01 15:56:47 2012 +0100
@@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
                     bitmaskof(X86_FEATURE_SSE4A) |
                     bitmaskof(X86_FEATURE_MISALIGNSSE) |
                     bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
+                    bitmaskof(X86_FEATURE_OSVW) |
                     bitmaskof(X86_FEATURE_XOP) |
                     bitmaskof(X86_FEATURE_FMA4) |
                     bitmaskof(X86_FEATURE_TBM) |
diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c	Wed Feb 01 15:56:47 2012 +0100
@@ -83,6 +83,9 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
 
 static bool_t amd_erratum383_found __read_mostly;
 
+/* OSVW bits */
+static uint64_t osvw_length, osvw_status;
+
 void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
 {
     struct vcpu *curr = current;
@@ -902,6 +905,61 @@ static void svm_do_resume(struct vcpu *v
     reset_stack_and_jump(svm_asm_do_resume);
 }
 
+static void svm_guest_osvw_init(struct vcpu *vcpu)
+{
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+        return;
+
+    /*
+     * Guests should see errata 400 and 415 as fixed (assuming that
+     * HLT and IO instructions are intercepted).
+     */
+    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length :
3;
+    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
+
+    /*
+     * By increasing VCPU''s osvw.length to 3 we are telling the guest
that
+     * all osvw.status bits inside that length, including bit 0 (which is
+     * reserved for erratum 298), are valid. However, if host
processor''s
+     * osvw_len is 0 then osvw_status[0] carries no information. We need to
+     * be conservative here and therefore we tell the guest that erratum 298
+     * is present (because we really don''t know).
+     */
+    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
+        vcpu->arch.hvm_svm.osvw.status |= 1;
+}
+
+void svm_host_osvw_reset()
+{
+    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
+    osvw_status = 0;
+}
+
+void svm_host_osvw_init()
+{
+    /* 
+     * Get OSVW bits. If bits are not the same on different processors then
+     * choose the worst case (i.e. if erratum is present on one processor and
+     * not on another assume that the erratum is present everywhere).
+     */
+    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
+    {
+        uint64_t len, status;
+         
+        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
+             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
+            len = status = 0;
+
+        if (len < osvw_length)
+            osvw_length = len;
+
+        osvw_status |= status;
+        osvw_status &= (1ULL << osvw_length) - 1;
+    }
+    else
+        osvw_length = osvw_status = 0;
+}
+
 static int svm_domain_initialise(struct domain *d)
 {
     return 0;
@@ -930,6 +988,9 @@ static int svm_vcpu_initialise(struct vc
     }
 
     vpmu_initialise(v);
+
+    svm_guest_osvw_init(v);
+
     return 0;
 }
 
@@ -1044,6 +1105,27 @@ static void svm_init_erratum_383(struct 
     }
 }
 
+static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, bool_t
read)
+{
+    uint eax, ebx, ecx, edx;
+     
+    /* Guest OSVW support */
+    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
+        return -1;
+
+    if ( read )
+    {
+        if (msr == MSR_AMD_OSVW_ID_LENGTH)
+            *val = v->arch.hvm_svm.osvw.length;
+        else
+            *val = v->arch.hvm_svm.osvw.status;
+    } 
+    /* Writes are ignored */
+
+    return 0;
+}
+
 static int svm_cpu_up(void)
 {
     uint64_t msr_content;
@@ -1094,6 +1176,9 @@ static int svm_cpu_up(void)
     }
 #endif
 
+    /* Initialize OSVW bits to be used by guests */
+    svm_host_osvw_init();
+
     return 0;
 }
 
@@ -1104,6 +1189,8 @@ struct hvm_function_table * __init start
     if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
         return NULL;
 
+    svm_host_osvw_reset();
+
     if ( svm_cpu_up() )
     {
         printk("SVM: failed to initialise.\n");
@@ -1388,6 +1475,13 @@ static int svm_msr_read_intercept(unsign
         vpmu_do_rdmsr(msr, msr_content);
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, msr_content, 1);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_rdmsr(v, msr, msr_content);
         if ( ret < 0 )
@@ -1512,6 +1606,13 @@ static int svm_msr_write_intercept(unsig
          */
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, &msr_content, 0);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_wrmsr(v, msr, msr_content);
         if ( ret < 0 )
diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/microcode.c	Wed Feb 01 15:56:47 2012 +0100
@@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
     info->error = 0;
     info->cpu = cpumask_first(&cpu_online_map);
 
+    if ( microcode_ops->start_update )
+    {
+        ret = microcode_ops->start_update();
+        if ( ret != 0 )
+        {
+            xfree(info);
+            return ret;
+        }
+    }
+
     return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
 }
 
diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/microcode_amd.c
--- a/xen/arch/x86/microcode_amd.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/microcode_amd.c	Wed Feb 01 15:56:47 2012 +0100
@@ -25,6 +25,7 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/microcode.h>
+#include <asm/hvm/svm/svm.h>
 
 struct equiv_cpu_entry {
     uint32_t installed_cpu;
@@ -287,7 +288,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! Wrong "
                "microcode patch file magic\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_amd = xmalloc(struct microcode_amd);
@@ -295,7 +297,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! "
                "Can not allocate memory for microcode patch\n");
-        return -ENOMEM;
+        error = -ENOMEM;
+        goto out;
     }
 
     error = install_equiv_cpu_table(mc_amd, buf, &offset);
@@ -303,7 +306,8 @@ static int cpu_request_microcode(int cpu
     {
         xfree(mc_amd);
         printk(KERN_ERR "microcode: installing equivalent cpu table
failed\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_old = uci->mc.mc_amd;
@@ -337,13 +341,19 @@ static int cpu_request_microcode(int cpu
     /* On success keep the microcode patch for
      * re-apply on resume.
      */
-    if (error == 1)
+    if ( error == 1 )
     {
         xfree(mc_old);
-        return 0;
+        error = 0;
+    } 
+    else 
+    {
+        xfree(mc_amd);
+        uci->mc.mc_amd = mc_old;
     }
-    xfree(mc_amd);
-    uci->mc.mc_amd = mc_old;
+
+  out:
+    svm_host_osvw_init();
 
     return error;
 }
@@ -395,11 +405,19 @@ err1:
     return -ENOMEM;
 }
 
+static int start_update(void) 
+{
+    svm_host_osvw_reset();
+
+    return 0;
+}
+
 static const struct microcode_ops microcode_amd_ops = {
     .microcode_resume_match           = microcode_resume_match,
     .cpu_request_microcode            = cpu_request_microcode,
     .collect_cpu_info                 = collect_cpu_info,
     .apply_microcode                  = apply_microcode,
+    .start_update                     = start_update,
 };
 
 static __init int microcode_init_amd(void)
diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/platform_hypercall.c	Wed Feb 01 15:56:47 2012 +0100
@@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
             break;
 
         guest_from_compat_handle(data, op->u.microcode.data);
+
+        /* 
+         * alloc_vpcu() will access data which is modified during
+         * microcode update
+         */
+        while ( !spin_trylock(&vcpu_alloc_lock) )
+            if ( hypercall_preempt_check() )
+            {
+                ret = hypercall_create_continuation(
+                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
+                goto out;
+            }
+
         ret = microcode_update(data, op->u.microcode.length);
+        spin_unlock(&vcpu_alloc_lock);
     }
     break;
 
diff -r e2722b24dc09 -r 789bbf4f478b xen/common/domctl.c
--- a/xen/common/domctl.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/common/domctl.c	Wed Feb 01 15:56:47 2012 +0100
@@ -29,6 +29,7 @@
 #include <xsm/xsm.h>
 
 static DEFINE_SPINLOCK(domctl_lock);
+DEFINE_SPINLOCK(vcpu_alloc_lock);
 
 int cpumask_to_xenctl_cpumap(
     struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
@@ -502,6 +503,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         /* Needed, for example, to ensure writable p.t. state is synced. */
         domain_pause(d);
 
+        /* 
+         * Certain operations (e.g. CPU microcode updates) modify data which is
+         * used during VCPU allocation/initialization
+         */
+        while ( !spin_trylock(&vcpu_alloc_lock) )
+            if ( hypercall_preempt_check() )
+            {
+                ret =  hypercall_create_continuation(
+                    __HYPERVISOR_domctl, "h", u_domctl);
+                goto maxvcpu_out_novcpulock;
+            }
+
         /* We cannot reduce maximum VCPUs. */
         ret = -EINVAL;
         if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
@@ -551,6 +564,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         ret = 0;
 
     maxvcpu_out:
+        spin_unlock(&vcpu_alloc_lock);
+
+    maxvcpu_out_novcpulock:
         domain_unpause(d);
         rcu_unlock_domain(d);
     }
diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/hvm/svm/svm.h	Wed Feb 01 15:56:47 2012 +0100
@@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
                                   ~TSC_RATIO_RSVD_BITS )
 #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz,
cpu_khz)
 
+extern void svm_host_osvw_reset(void);
+extern void svm_host_osvw_init(void);
+
 #endif /* __ASM_X86_HVM_SVM_H__ */
diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Wed Feb 01 15:56:47 2012 +0100
@@ -516,6 +516,12 @@ struct arch_svm_struct {
     
     /* AMD lightweight profiling MSR */
     uint64_t guest_lwp_cfg;
+
+    /* OSVW MSRs */
+    struct {
+        u64 length;
+        u64 status;
+    } osvw;
 };
 
 struct vmcb_struct *alloc_vmcb(void);
diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/microcode.h
--- a/xen/include/asm-x86/microcode.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/microcode.h	Wed Feb 01 15:56:47 2012 +0100
@@ -11,6 +11,7 @@ struct microcode_ops {
     int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
     int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
     int (*apply_microcode)(int cpu);
+    int (*start_update)(void);
 };
 
 struct cpu_signature {
diff -r e2722b24dc09 -r 789bbf4f478b xen/include/xen/domain.h
--- a/xen/include/xen/domain.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/xen/domain.h	Wed Feb 01 15:56:47 2012 +0100
@@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
 
 void arch_vcpu_reset(struct vcpu *v);
 
+extern spinlock_t vcpu_alloc_lock;
 bool_t domctl_lock_acquire(void);
 void domctl_lock_release(void);

Jan Beulich

2012-Feb-02 13:22 UTC

head link

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 01.02.12 at 17:30, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> # HG changeset patch
> # User Boris Ostrovsky <boris.ostrovsky@amd.com>
> # Date 1328108207 -3600
> # Node ID 789bbf4f478b0e81d71240a1f1147ef66a7c8848
> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
> x86/AMD: Add support for AMD''s OSVW feature in guests.
> 
> In some cases guests should not provide workarounds for errata even when
the
> physical processor is affected. For example, because of erratum 400 on 
> family
> 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
> going to idle in order to avoid getting stuck in a non-C0 state. This is
not
> necessary: HLT and IO instructions are intercepted and therefore there is
no
> reason for erratum 400 workaround in the guest.
> 
> This patch allows us to present a guest with certain errata as fixed,
> regardless of the state of actual hardware.
As I was about to apply this to my local tree to give it a try, I had
to realize that the microcode integration is still not correct: There
is (at least from an abstract perspective) no guarantee for
cpu_request_microcode() to be called on all CPUs, yet you want
svm_host_osvw_init() to be re-run on all of them. If you choose
to not deal with this in a formally correct way, it should be stated
so in a code comment (to lower the risk of surprises when someone
touches that code) that this is not possible in practice because
collect_cpu_info() won''t currently fail for CPUs of interest.

Further you need to serialize against onlining of CPUs while in
svm_host_osvw_reset(), and similarly protect the updating of the
globals in svm_host_osvw_init(). Probably a private locked shared
between those two functions is the best route to go here.
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
> Acked-by: Christoph Egger <Christoph.Egger@amd.com>
> 
> diff -r e2722b24dc09 -r 789bbf4f478b tools/libxc/xc_cpuid_x86.c
> --- a/tools/libxc/xc_cpuid_x86.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/tools/libxc/xc_cpuid_x86.c	Wed Feb 01 15:56:47 2012 +0100
> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
>                      bitmaskof(X86_FEATURE_SSE4A) |
>                      bitmaskof(X86_FEATURE_MISALIGNSSE) |
>                      bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
> +                    bitmaskof(X86_FEATURE_OSVW) |
>                      bitmaskof(X86_FEATURE_XOP) |
>                      bitmaskof(X86_FEATURE_FMA4) |
>                      bitmaskof(X86_FEATURE_TBM) |
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/hvm/svm/svm.c
> --- a/xen/arch/x86/hvm/svm/svm.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/hvm/svm/svm.c	Wed Feb 01 15:56:47 2012 +0100
> @@ -83,6 +83,9 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
>  
>  static bool_t amd_erratum383_found __read_mostly;
>  
> +/* OSVW bits */
> +static uint64_t osvw_length, osvw_status;
> +
>  void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
>  {
>      struct vcpu *curr = current;
> @@ -902,6 +905,61 @@ static void svm_do_resume(struct vcpu *v
>      reset_stack_and_jump(svm_asm_do_resume);
>  }
>  
> +static void svm_guest_osvw_init(struct vcpu *vcpu)
> +{
> +    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
> +        return;
> +
> +    /*
> +     * Guests should see errata 400 and 415 as fixed (assuming that
> +     * HLT and IO instructions are intercepted).
> +     */
> +    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ?
osvw_length : 3;
> +    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
> +
> +    /*
> +     * By increasing VCPU''s osvw.length to 3 we are telling the
guest that
> +     * all osvw.status bits inside that length, including bit 0 (which is
> +     * reserved for erratum 298), are valid. However, if host
processor''s
> +     * osvw_len is 0 then osvw_status[0] carries no information. We need
to
> +     * be conservative here and therefore we tell the guest that erratum 
> 298
> +     * is present (because we really don''t know).
> +     */
> +    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
> +        vcpu->arch.hvm_svm.osvw.status |= 1;
> +}
> +
> +void svm_host_osvw_reset()
> +{
> +    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
> +    osvw_status = 0;
> +}
> +
> +void svm_host_osvw_init()
> +{
> +    /* 
> +     * Get OSVW bits. If bits are not the same on different processors
then
> +     * choose the worst case (i.e. if erratum is present on one processor 
> and
> +     * not on another assume that the erratum is present everywhere).
> +     */
> +    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
> +    {
> +        uint64_t len, status;
> +         
> +        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
> +             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
> +            len = status = 0;
> +
> +        if (len < osvw_length)
> +            osvw_length = len;
> +
> +        osvw_status |= status;
> +        osvw_status &= (1ULL << osvw_length) - 1;
> +    }
> +    else
> +        osvw_length = osvw_status = 0;
> +}
> +
>  static int svm_domain_initialise(struct domain *d)
>  {
>      return 0;
> @@ -930,6 +988,9 @@ static int svm_vcpu_initialise(struct vc
>      }
>  
>      vpmu_initialise(v);
> +
> +    svm_guest_osvw_init(v);
> +
>      return 0;
>  }
>  
> @@ -1044,6 +1105,27 @@ static void svm_init_erratum_383(struct 
>      }
>  }
>  
> +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, 
> bool_t read)
> +{
> +    uint eax, ebx, ecx, edx;
> +     
> +    /* Guest OSVW support */
> +    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
> +    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
> +        return -1;
> +
> +    if ( read )
> +    {
> +        if (msr == MSR_AMD_OSVW_ID_LENGTH)
> +            *val = v->arch.hvm_svm.osvw.length;
> +        else
> +            *val = v->arch.hvm_svm.osvw.status;
> +    } 
> +    /* Writes are ignored */
> +
> +    return 0;
> +}
> +
>  static int svm_cpu_up(void)
>  {
>      uint64_t msr_content;
> @@ -1094,6 +1176,9 @@ static int svm_cpu_up(void)
>      }
>  #endif
>  
> +    /* Initialize OSVW bits to be used by guests */
> +    svm_host_osvw_init();
> +
>      return 0;
>  }
>  
> @@ -1104,6 +1189,8 @@ struct hvm_function_table * __init start
>      if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
>          return NULL;
>  
> +    svm_host_osvw_reset();
> +
>      if ( svm_cpu_up() )
>      {
>          printk("SVM: failed to initialise.\n");
> @@ -1388,6 +1475,13 @@ static int svm_msr_read_intercept(unsign
>          vpmu_do_rdmsr(msr, msr_content);
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, msr_content, 1);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_rdmsr(v, msr, msr_content);
>          if ( ret < 0 )
> @@ -1512,6 +1606,13 @@ static int svm_msr_write_intercept(unsig
>           */
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, &msr_content, 0);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_wrmsr(v, msr, msr_content);
>          if ( ret < 0 )
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/microcode.c
> --- a/xen/arch/x86/microcode.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode.c	Wed Feb 01 15:56:47 2012 +0100
> @@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
>      info->error = 0;
>      info->cpu = cpumask_first(&cpu_online_map);
>  
> +    if ( microcode_ops->start_update )
> +    {
> +        ret = microcode_ops->start_update();
> +        if ( ret != 0 )
> +        {
> +            xfree(info);
> +            return ret;
> +        }
> +    }
> +
>      return continue_hypercall_on_cpu(info->cpu, do_microcode_update,
info);
>  }
>  
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/microcode_amd.c
> --- a/xen/arch/x86/microcode_amd.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode_amd.c	Wed Feb 01 15:56:47 2012 +0100
> @@ -25,6 +25,7 @@
>  #include <asm/msr.h>
>  #include <asm/processor.h>
>  #include <asm/microcode.h>
> +#include <asm/hvm/svm/svm.h>
>  
>  struct equiv_cpu_entry {
>      uint32_t installed_cpu;
> @@ -287,7 +288,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! Wrong "
>                 "microcode patch file magic\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_amd = xmalloc(struct microcode_amd);
> @@ -295,7 +297,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! "
>                 "Can not allocate memory for microcode patch\n");
> -        return -ENOMEM;
> +        error = -ENOMEM;
> +        goto out;
>      }
>  
>      error = install_equiv_cpu_table(mc_amd, buf, &offset);
> @@ -303,7 +306,8 @@ static int cpu_request_microcode(int cpu
>      {
>          xfree(mc_amd);
>          printk(KERN_ERR "microcode: installing equivalent cpu table 
> failed\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_old = uci->mc.mc_amd;
> @@ -337,13 +341,19 @@ static int cpu_request_microcode(int cpu
>      /* On success keep the microcode patch for
>       * re-apply on resume.
>       */
> -    if (error == 1)
> +    if ( error == 1 )
>      {
>          xfree(mc_old);
> -        return 0;
> +        error = 0;
> +    } 
> +    else 
> +    {
> +        xfree(mc_amd);
> +        uci->mc.mc_amd = mc_old;
>      }
> -    xfree(mc_amd);
> -    uci->mc.mc_amd = mc_old;
> +
> +  out:
> +    svm_host_osvw_init();
>  
>      return error;
>  }
> @@ -395,11 +405,19 @@ err1:
>      return -ENOMEM;
>  }
>  
> +static int start_update(void) 
> +{
> +    svm_host_osvw_reset();
> +
> +    return 0;
> +}
> +
>  static const struct microcode_ops microcode_amd_ops = {
>      .microcode_resume_match           = microcode_resume_match,
>      .cpu_request_microcode            = cpu_request_microcode,
>      .collect_cpu_info                 = collect_cpu_info,
>      .apply_microcode                  = apply_microcode,
> +    .start_update                     = start_update,
>  };
>  
>  static __init int microcode_init_amd(void)
> diff -r e2722b24dc09 -r 789bbf4f478b xen/arch/x86/platform_hypercall.c
> --- a/xen/arch/x86/platform_hypercall.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/platform_hypercall.c	Wed Feb 01 15:56:47 2012 +0100
> @@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
>              break;
>  
>          guest_from_compat_handle(data, op->u.microcode.data);
> +
> +        /* 
> +         * alloc_vpcu() will access data which is modified during
> +         * microcode update
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret = hypercall_create_continuation(
> +                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
> +                goto out;
> +            }
> +
>          ret = microcode_update(data, op->u.microcode.length);
> +        spin_unlock(&vcpu_alloc_lock);
>      }
>      break;
>  
> diff -r e2722b24dc09 -r 789bbf4f478b xen/common/domctl.c
> --- a/xen/common/domctl.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/common/domctl.c	Wed Feb 01 15:56:47 2012 +0100
> @@ -29,6 +29,7 @@
>  #include <xsm/xsm.h>
>  
>  static DEFINE_SPINLOCK(domctl_lock);
> +DEFINE_SPINLOCK(vcpu_alloc_lock);
>  
>  int cpumask_to_xenctl_cpumap(
>      struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
> @@ -502,6 +503,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          /* Needed, for example, to ensure writable p.t. state is synced.
*/
>          domain_pause(d);
>  
> +        /* 
> +         * Certain operations (e.g. CPU microcode updates) modify data 
> which is
> +         * used during VCPU allocation/initialization
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret =  hypercall_create_continuation(
> +                    __HYPERVISOR_domctl, "h", u_domctl);
> +                goto maxvcpu_out_novcpulock;
> +            }
> +
>          /* We cannot reduce maximum VCPUs. */
>          ret = -EINVAL;
>          if ( (max < d->max_vcpus) && (d->vcpu[max] !=
NULL) )
> @@ -551,6 +564,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          ret = 0;
>  
>      maxvcpu_out:
> +        spin_unlock(&vcpu_alloc_lock);
> +
> +    maxvcpu_out_novcpulock:
>          domain_unpause(d);
>          rcu_unlock_domain(d);
>      }
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/hvm/svm/svm.h
> --- a/xen/include/asm-x86/hvm/svm/svm.h	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/hvm/svm/svm.h	Wed Feb 01 15:56:47 2012 +0100
> @@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
>                                    ~TSC_RATIO_RSVD_BITS )
>  #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz,
cpu_khz)
>  
> +extern void svm_host_osvw_reset(void);
> +extern void svm_host_osvw_init(void);
> +
>  #endif /* __ASM_X86_HVM_SVM_H__ */
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/hvm/svm/vmcb.h
> --- a/xen/include/asm-x86/hvm/svm/vmcb.h	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Wed Feb 01 15:56:47 2012 +0100
> @@ -516,6 +516,12 @@ struct arch_svm_struct {
>      
>      /* AMD lightweight profiling MSR */
>      uint64_t guest_lwp_cfg;
> +
> +    /* OSVW MSRs */
> +    struct {
> +        u64 length;
> +        u64 status;
> +    } osvw;
>  };
>  
>  struct vmcb_struct *alloc_vmcb(void);
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/asm-x86/microcode.h
> --- a/xen/include/asm-x86/microcode.h	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/microcode.h	Wed Feb 01 15:56:47 2012 +0100
> @@ -11,6 +11,7 @@ struct microcode_ops {
>      int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
>      int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
>      int (*apply_microcode)(int cpu);
> +    int (*start_update)(void);
>  };
>  
>  struct cpu_signature {
> diff -r e2722b24dc09 -r 789bbf4f478b xen/include/xen/domain.h
> --- a/xen/include/xen/domain.h	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/xen/domain.h	Wed Feb 01 15:56:47 2012 +0100
> @@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
>  
>  void arch_vcpu_reset(struct vcpu *v);
>  
> +extern spinlock_t vcpu_alloc_lock;
>  bool_t domctl_lock_acquire(void);
>  void domctl_lock_release(void);
>

Boris Ostrovsky

2012-Feb-02 20:29 UTC

head link

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

On 02/02/12 08:22, Jan Beulich wrote:>>>> On 01.02.12 at 17:30, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>> # HG changeset patch
>> # User Boris Ostrovsky<boris.ostrovsky@amd.com>
>> # Date 1328108207 -3600
>> # Node ID 789bbf4f478b0e81d71240a1f1147ef66a7c8848
>> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
>> x86/AMD: Add support for AMD''s OSVW feature in guests.
>>
>> In some cases guests should not provide workarounds for errata even
when the
>> physical processor is affected. For example, because of erratum 400 on
>> family
>> 10h processors a Linux guest will read an MSR (resulting in VMEXIT)
before
>> going to idle in order to avoid getting stuck in a non-C0 state. This
is not
>> necessary: HLT and IO instructions are intercepted and therefore there
is no
>> reason for erratum 400 workaround in the guest.
>>
>> This patch allows us to present a guest with certain errata as fixed,
>> regardless of the state of actual hardware.
>
> As I was about to apply this to my local tree to give it a try, I had
> to realize that the microcode integration is still not correct: There
> is (at least from an abstract perspective) no guarantee for
> cpu_request_microcode() to be called on all CPUs, yet you want
> svm_host_osvw_init() to be re-run on all of them. If you choose
> to not deal with this in a formally correct way, it should be stated
> so in a code comment (to lower the risk of surprises when someone
> touches that code) that this is not possible in practice because
> collect_cpu_info() won''t currently fail for CPUs of interest.
What if svm_host_osvw_init() is called from collect_cpu_info()? There 
may be cases when svm_host_osvw_init() is called multiple times for the 
same cpu but that should be harmless (and the routine will be renamed to 
svm_host_osvw_update()).

This would change a bit the scope of things that collect_cpu_info() is 
expected to do though. But then one could argue that stashing OSVW bits 
is to some extent also collecting CPU info, albeit for a different purpose.


-boris

Jan Beulich

2012-Feb-03 07:53 UTC

head link

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 02.02.12 at 21:29, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> On 02/02/12 08:22, Jan Beulich wrote:
>> As I was about to apply this to my local tree to give it a try, I had
>> to realize that the microcode integration is still not correct: There
>> is (at least from an abstract perspective) no guarantee for
>> cpu_request_microcode() to be called on all CPUs, yet you want
>> svm_host_osvw_init() to be re-run on all of them. If you choose
>> to not deal with this in a formally correct way, it should be stated
>> so in a code comment (to lower the risk of surprises when someone
>> touches that code) that this is not possible in practice because
>> collect_cpu_info() won''t currently fail for CPUs of interest.
> 
> What if svm_host_osvw_init() is called from collect_cpu_info()? There 
> may be cases when svm_host_osvw_init() is called multiple times for the 
> same cpu but that should be harmless (and the routine will be renamed to 
> svm_host_osvw_update()).
Wouldn''t that result in workaround bits that might get cleared with
the pending microcode update to get (and remain) set, as they''re
being or-ed together over all invocations of the function after any
svm_host_osvw_reset()?

Jan

Boris Ostrovsky

2012-Feb-03 16:13 UTC

head link

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

On 02/03/12 02:53, Jan Beulich wrote:>>>> On 02.02.12 at 21:29, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>> On 02/02/12 08:22, Jan Beulich wrote:
>>> As I was about to apply this to my local tree to give it a try, I
had
>>> to realize that the microcode integration is still not correct:
There
>>> is (at least from an abstract perspective) no guarantee for
>>> cpu_request_microcode() to be called on all CPUs, yet you want
>>> svm_host_osvw_init() to be re-run on all of them. If you choose
>>> to not deal with this in a formally correct way, it should be
stated
>>> so in a code comment (to lower the risk of surprises when someone
>>> touches that code) that this is not possible in practice because
>>> collect_cpu_info() won''t currently fail for CPUs of
interest.
>>
>> What if svm_host_osvw_init() is called from collect_cpu_info()? There
>> may be cases when svm_host_osvw_init() is called multiple times for the
>> same cpu but that should be harmless (and the routine will be renamed
to
>> svm_host_osvw_update()).
>
> Wouldn''t that result in workaround bits that might get cleared
with
> the pending microcode update to get (and remain) set, as they''re
> being or-ed together over all invocations of the function after any
> svm_host_osvw_reset()?

I think that would be an OK but not optimal situation: more bits will 
end up being set than necessary, meaning that workarounds will need to 
be applied where they may not be required. But that should not affect 
correctness. I am not sure it''s worth optimizing for this case since I 
think onlining a core while doing a microcode update is a rather 
uncommon occurrence.

What could have been a problem is the case when a core that''s coming up
has more bits set than other cores and doesn''t get to go into 
cpu_request_microcode(). Since collect_cpu_info() is always invoked on 
the onlining path we will not miss a call into svm_host_osvw_init().

-boris

Jan Beulich

2012-Feb-03 16:25 UTC

head link

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 03.02.12 at 17:13, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> On 02/03/12 02:53, Jan Beulich wrote:
>>>>> On 02.02.12 at 21:29, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>>> On 02/02/12 08:22, Jan Beulich wrote:
>>>> As I was about to apply this to my local tree to give it a try,
I had
>>>> to realize that the microcode integration is still not correct:
There
>>>> is (at least from an abstract perspective) no guarantee for
>>>> cpu_request_microcode() to be called on all CPUs, yet you want
>>>> svm_host_osvw_init() to be re-run on all of them. If you choose
>>>> to not deal with this in a formally correct way, it should be
stated
>>>> so in a code comment (to lower the risk of surprises when
someone
>>>> touches that code) that this is not possible in practice
because
>>>> collect_cpu_info() won''t currently fail for CPUs of
interest.
>>>
>>> What if svm_host_osvw_init() is called from collect_cpu_info()?
There
>>> may be cases when svm_host_osvw_init() is called multiple times for
the
>>> same cpu but that should be harmless (and the routine will be
renamed to
>>> svm_host_osvw_update()).
>>
>> Wouldn''t that result in workaround bits that might get cleared
with
>> the pending microcode update to get (and remain) set, as
they''re
>> being or-ed together over all invocations of the function after any
>> svm_host_osvw_reset()?
> 
> 
> I think that would be an OK but not optimal situation: more bits will 
> end up being set than necessary, meaning that workarounds will need to 
> be applied where they may not be required. But that should not affect 
> correctness. I am not sure it''s worth optimizing for this case
since I
> think onlining a core while doing a microcode update is a rather 
> uncommon occurrence.
How is that related to CPU onlining? If you put the call into
collect_cpu_info(), that''ll affect all CPUs currently online as well
(and, as I said, negate the effect of possibly cleared OSVW bits
that the pending microcode updated might have). The only case
where calling svm_host_osvw_init() from collect_cpu_info() would
be correct is if the latter returns an error. Which, as I noted
before, can only happen for non-AMD or pre-Fam10 CPUs (on
all of which calling the function is pointless). So we''re back to me
asking that this simply be explained in a code comment in lieu of
a proper abstraction of the situation.

Jan

Boris Ostrovsky

2012-Feb-03 16:48 UTC

head link

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

On 02/03/12 11:25, Jan Beulich wrote:>>>> On 03.02.12 at 17:13, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>> On 02/03/12 02:53, Jan Beulich wrote:
>>>>>> On 02.02.12 at 21:29, Boris
Ostrovsky<boris.ostrovsky@amd.com>   wrote:
>>>> On 02/02/12 08:22, Jan Beulich wrote:
>>>>> As I was about to apply this to my local tree to give it a
try, I had
>>>>> to realize that the microcode integration is still not
correct: There
>>>>> is (at least from an abstract perspective) no guarantee for
>>>>> cpu_request_microcode() to be called on all CPUs, yet you
want
>>>>> svm_host_osvw_init() to be re-run on all of them. If you
choose
>>>>> to not deal with this in a formally correct way, it should
be stated
>>>>> so in a code comment (to lower the risk of surprises when
someone
>>>>> touches that code) that this is not possible in practice
because
>>>>> collect_cpu_info() won''t currently fail for CPUs
of interest.
>>>>
>>>> What if svm_host_osvw_init() is called from collect_cpu_info()?
There
>>>> may be cases when svm_host_osvw_init() is called multiple times
for the
>>>> same cpu but that should be harmless (and the routine will be
renamed to
>>>> svm_host_osvw_update()).
>>>
>>> Wouldn''t that result in workaround bits that might get
cleared with
>>> the pending microcode update to get (and remain) set, as
they''re
>>> being or-ed together over all invocations of the function after any
>>> svm_host_osvw_reset()?
>>
>>
>> I think that would be an OK but not optimal situation: more bits will
>> end up being set than necessary, meaning that workarounds will need to
>> be applied where they may not be required. But that should not affect
>> correctness. I am not sure it''s worth optimizing for this case
since I
>> think onlining a core while doing a microcode update is a rather
>> uncommon occurrence.
>
> How is that related to CPU onlining? If you put the call into
> collect_cpu_info(), that''ll affect all CPUs currently online as
well
> (and, as I said, negate the effect of possibly cleared OSVW bits
> that the pending microcode updated might have). The only case
> where calling svm_host_osvw_init() from collect_cpu_info() would
> be correct is if the latter returns an error. Which, as I noted
> before, can only happen for non-AMD or pre-Fam10 CPUs (on
> all of which calling the function is pointless). So we''re back to
me
> asking that this simply be explained in a code comment in lieu of
> a proper abstraction of the situation.

OK, I misunderstood your original comment then. I thought you were 
talking about a race between microcode update and onlining a core.

-boris

Jan Beulich

2012-Feb-03 16:59 UTC

head link

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 03.02.12 at 17:48, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> On 02/03/12 11:25, Jan Beulich wrote:
>>>>> On 03.02.12 at 17:13, Boris
Ostrovsky<boris.ostrovsky@amd.com>  wrote:
>>> On 02/03/12 02:53, Jan Beulich wrote:
>>>>>>> On 02.02.12 at 21:29, Boris
Ostrovsky<boris.ostrovsky@amd.com>   wrote:
>>>>> On 02/02/12 08:22, Jan Beulich wrote:
>>>>>> As I was about to apply this to my local tree to give
it a try, I had
>>>>>> to realize that the microcode integration is still not
correct: There
>>>>>> is (at least from an abstract perspective) no guarantee
for
>>>>>> cpu_request_microcode() to be called on all CPUs, yet
you want
>>>>>> svm_host_osvw_init() to be re-run on all of them. If
you choose
>>>>>> to not deal with this in a formally correct way, it
should be stated
>>>>>> so in a code comment (to lower the risk of surprises
when someone
>>>>>> touches that code) that this is not possible in
practice because
>>>>>> collect_cpu_info() won''t currently fail for
CPUs of interest.
>>>>>
>>>>> What if svm_host_osvw_init() is called from
collect_cpu_info()? There
>>>>> may be cases when svm_host_osvw_init() is called multiple
times for the
>>>>> same cpu but that should be harmless (and the routine will
be renamed to
>>>>> svm_host_osvw_update()).
>>>>
>>>> Wouldn''t that result in workaround bits that might get
cleared with
>>>> the pending microcode update to get (and remain) set, as
they''re
>>>> being or-ed together over all invocations of the function after
any
>>>> svm_host_osvw_reset()?
>>>
>>>
>>> I think that would be an OK but not optimal situation: more bits
will
>>> end up being set than necessary, meaning that workarounds will need
to
>>> be applied where they may not be required. But that should not
affect
>>> correctness. I am not sure it''s worth optimizing for this
case since I
>>> think onlining a core while doing a microcode update is a rather
>>> uncommon occurrence.
>>
>> How is that related to CPU onlining? If you put the call into
>> collect_cpu_info(), that''ll affect all CPUs currently online
as well
>> (and, as I said, negate the effect of possibly cleared OSVW bits
>> that the pending microcode updated might have). The only case
>> where calling svm_host_osvw_init() from collect_cpu_info() would
>> be correct is if the latter returns an error. Which, as I noted
>> before, can only happen for non-AMD or pre-Fam10 CPUs (on
>> all of which calling the function is pointless). So we''re back
to me
>> asking that this simply be explained in a code comment in lieu of
>> a proper abstraction of the situation.
> 
> 
> OK, I misunderstood your original comment then. I thought you were 
> talking about a race between microcode update and onlining a core.
That was a second concern, easily taken care of by adding locking
in _init() and _reset().

Jan

Xen devel - Feb 2012 - [PATCH v4] x86/AMD: Add support for AMD's OSVW feature in guests

[PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v4] x86/AMD: Add support for AMD''s OSVW feature in guests