thr3ads.net - Xen devel - [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests [Feb 2012]

If this information is useful, please help other people find it:
Share via:

Boris Ostrovsky

2012-Feb-06 17:39 UTC

[PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@amd.com>
# Date 1328549858 -3600
# Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
# Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
x86/AMD: Add support for AMD''s OSVW feature in guests.

In some cases guests should not provide workarounds for errata even when the
physical processor is affected. For example, because of erratum 400 on family
10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
going to idle in order to avoid getting stuck in a non-C0 state. This is not
necessary: HLT and IO instructions are intercepted and therefore there is no
reason for erratum 400 workaround in the guest.

This patch allows us to present a guest with certain errata as fixed,
regardless of the state of actual hardware.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
Acked-by: Christoph Egger <Christoph.Egger@amd.com>

diff -r e2722b24dc09 -r 3cf8ffd0ab88 tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/tools/libxc/xc_cpuid_x86.c	Mon Feb 06 18:37:38 2012 +0100
@@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
                     bitmaskof(X86_FEATURE_SSE4A) |
                     bitmaskof(X86_FEATURE_MISALIGNSSE) |
                     bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
+                    bitmaskof(X86_FEATURE_OSVW) |
                     bitmaskof(X86_FEATURE_XOP) |
                     bitmaskof(X86_FEATURE_FMA4) |
                     bitmaskof(X86_FEATURE_TBM) |
diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c	Mon Feb 06 18:37:38 2012 +0100
@@ -83,6 +83,10 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
 
 static bool_t amd_erratum383_found __read_mostly;
 
+/* OSVW bits */
+static uint64_t osvw_length, osvw_status;
+static DEFINE_SPINLOCK(osvw_lock);
+
 void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
 {
     struct vcpu *curr = current;
@@ -902,6 +906,69 @@ static void svm_do_resume(struct vcpu *v
     reset_stack_and_jump(svm_asm_do_resume);
 }
 
+static void svm_guest_osvw_init(struct vcpu *vcpu)
+{
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+        return;
+
+    /*
+     * Guests should see errata 400 and 415 as fixed (assuming that
+     * HLT and IO instructions are intercepted).
+     */
+    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length :
3;
+    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
+
+    /*
+     * By increasing VCPU''s osvw.length to 3 we are telling the guest
that
+     * all osvw.status bits inside that length, including bit 0 (which is
+     * reserved for erratum 298), are valid. However, if host
processor''s
+     * osvw_len is 0 then osvw_status[0] carries no information. We need to
+     * be conservative here and therefore we tell the guest that erratum 298
+     * is present (because we really don''t know).
+     */
+    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
+        vcpu->arch.hvm_svm.osvw.status |= 1;
+}
+
+void svm_host_osvw_reset()
+{
+    spin_lock(&osvw_lock);
+
+    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
+    osvw_status = 0;
+
+    spin_unlock(&osvw_lock);   
+}
+
+void svm_host_osvw_init()
+{
+    spin_lock(&osvw_lock);
+
+    /* 
+     * Get OSVW bits. If bits are not the same on different processors then
+     * choose the worst case (i.e. if erratum is present on one processor and
+     * not on another assume that the erratum is present everywhere).
+     */
+    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
+    {
+        uint64_t len, status;
+         
+        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
+             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
+            len = status = 0;
+
+        if (len < osvw_length)
+            osvw_length = len;
+
+        osvw_status |= status;
+        osvw_status &= (1ULL << osvw_length) - 1;
+    }
+    else
+        osvw_length = osvw_status = 0;
+
+    spin_unlock(&osvw_lock);
+}
+
 static int svm_domain_initialise(struct domain *d)
 {
     return 0;
@@ -930,6 +997,9 @@ static int svm_vcpu_initialise(struct vc
     }
 
     vpmu_initialise(v);
+
+    svm_guest_osvw_init(v);
+
     return 0;
 }
 
@@ -1044,6 +1114,27 @@ static void svm_init_erratum_383(struct 
     }
 }
 
+static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, bool_t
read)
+{
+    uint eax, ebx, ecx, edx;
+     
+    /* Guest OSVW support */
+    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
+        return -1;
+
+    if ( read )
+    {
+        if (msr == MSR_AMD_OSVW_ID_LENGTH)
+            *val = v->arch.hvm_svm.osvw.length;
+        else
+            *val = v->arch.hvm_svm.osvw.status;
+    } 
+    /* Writes are ignored */
+
+    return 0;
+}
+
 static int svm_cpu_up(void)
 {
     uint64_t msr_content;
@@ -1094,6 +1185,9 @@ static int svm_cpu_up(void)
     }
 #endif
 
+    /* Initialize OSVW bits to be used by guests */
+    svm_host_osvw_init();
+
     return 0;
 }
 
@@ -1104,6 +1198,8 @@ struct hvm_function_table * __init start
     if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
         return NULL;
 
+    svm_host_osvw_reset();
+
     if ( svm_cpu_up() )
     {
         printk("SVM: failed to initialise.\n");
@@ -1388,6 +1484,13 @@ static int svm_msr_read_intercept(unsign
         vpmu_do_rdmsr(msr, msr_content);
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, msr_content, 1);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_rdmsr(v, msr, msr_content);
         if ( ret < 0 )
@@ -1512,6 +1615,13 @@ static int svm_msr_write_intercept(unsig
          */
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, &msr_content, 0);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_wrmsr(v, msr, msr_content);
         if ( ret < 0 )
diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/microcode.c	Mon Feb 06 18:37:38 2012 +0100
@@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
     info->error = 0;
     info->cpu = cpumask_first(&cpu_online_map);
 
+    if ( microcode_ops->start_update )
+    {
+        ret = microcode_ops->start_update();
+        if ( ret != 0 )
+        {
+            xfree(info);
+            return ret;
+        }
+    }
+
     return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
 }
 
diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/microcode_amd.c
--- a/xen/arch/x86/microcode_amd.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/microcode_amd.c	Mon Feb 06 18:37:38 2012 +0100
@@ -25,6 +25,7 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/microcode.h>
+#include <asm/hvm/svm/svm.h>
 
 struct equiv_cpu_entry {
     uint32_t installed_cpu;
@@ -71,6 +72,7 @@ struct mpbhdr {
 /* serialize access to the physical write */
 static DEFINE_SPINLOCK(microcode_update_lock);
 
+/* See comment in start_update() for cases when this routine fails */
 static int collect_cpu_info(int cpu, struct cpu_signature *csig)
 {
     struct cpuinfo_x86 *c = &cpu_data[cpu];
@@ -287,7 +289,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! Wrong "
                "microcode patch file magic\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_amd = xmalloc(struct microcode_amd);
@@ -295,7 +298,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! "
                "Can not allocate memory for microcode patch\n");
-        return -ENOMEM;
+        error = -ENOMEM;
+        goto out;
     }
 
     error = install_equiv_cpu_table(mc_amd, buf, &offset);
@@ -303,7 +307,8 @@ static int cpu_request_microcode(int cpu
     {
         xfree(mc_amd);
         printk(KERN_ERR "microcode: installing equivalent cpu table
failed\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_old = uci->mc.mc_amd;
@@ -337,13 +342,19 @@ static int cpu_request_microcode(int cpu
     /* On success keep the microcode patch for
      * re-apply on resume.
      */
-    if (error == 1)
+    if ( error == 1 )
     {
         xfree(mc_old);
-        return 0;
+        error = 0;
+    } 
+    else 
+    {
+        xfree(mc_amd);
+        uci->mc.mc_amd = mc_old;
     }
-    xfree(mc_amd);
-    uci->mc.mc_amd = mc_old;
+
+  out:
+    svm_host_osvw_init();
 
     return error;
 }
@@ -395,11 +406,28 @@ err1:
     return -ENOMEM;
 }
 
+static int start_update(void) 
+{
+    /*
+     * We assume here that svm_host_osvw_init() will be called on each cpu
(from
+     * cpu_request_microcode()). 
+     *
+     * Note that if collect_cpu_info() returns an error then
+     * cpu_request_microcode() will not invoked thus leaving OSVW bits not
+     * updated. Currently though collect_cpu_info() will not fail on processors
+     * supporting OSVW so we will not deal with this possibility.
+     */
+    svm_host_osvw_reset();
+
+    return 0;
+}
+
 static const struct microcode_ops microcode_amd_ops = {
     .microcode_resume_match           = microcode_resume_match,
     .cpu_request_microcode            = cpu_request_microcode,
     .collect_cpu_info                 = collect_cpu_info,
     .apply_microcode                  = apply_microcode,
+    .start_update                     = start_update,
 };
 
 static __init int microcode_init_amd(void)
diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/arch/x86/platform_hypercall.c	Mon Feb 06 18:37:38 2012 +0100
@@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
             break;
 
         guest_from_compat_handle(data, op->u.microcode.data);
+
+        /* 
+         * alloc_vpcu() will access data which is modified during
+         * microcode update
+         */
+        while ( !spin_trylock(&vcpu_alloc_lock) )
+            if ( hypercall_preempt_check() )
+            {
+                ret = hypercall_create_continuation(
+                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
+                goto out;
+            }
+
         ret = microcode_update(data, op->u.microcode.length);
+        spin_unlock(&vcpu_alloc_lock);
     }
     break;
 
diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/common/domctl.c
--- a/xen/common/domctl.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/common/domctl.c	Mon Feb 06 18:37:38 2012 +0100
@@ -29,6 +29,7 @@
 #include <xsm/xsm.h>
 
 static DEFINE_SPINLOCK(domctl_lock);
+DEFINE_SPINLOCK(vcpu_alloc_lock);
 
 int cpumask_to_xenctl_cpumap(
     struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
@@ -502,6 +503,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         /* Needed, for example, to ensure writable p.t. state is synced. */
         domain_pause(d);
 
+        /* 
+         * Certain operations (e.g. CPU microcode updates) modify data which is
+         * used during VCPU allocation/initialization
+         */
+        while ( !spin_trylock(&vcpu_alloc_lock) )
+            if ( hypercall_preempt_check() )
+            {
+                ret =  hypercall_create_continuation(
+                    __HYPERVISOR_domctl, "h", u_domctl);
+                goto maxvcpu_out_novcpulock;
+            }
+
         /* We cannot reduce maximum VCPUs. */
         ret = -EINVAL;
         if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
@@ -551,6 +564,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         ret = 0;
 
     maxvcpu_out:
+        spin_unlock(&vcpu_alloc_lock);
+
+    maxvcpu_out_novcpulock:
         domain_unpause(d);
         rcu_unlock_domain(d);
     }
diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/hvm/svm/svm.h	Mon Feb 06 18:37:38 2012 +0100
@@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
                                   ~TSC_RATIO_RSVD_BITS )
 #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz,
cpu_khz)
 
+extern void svm_host_osvw_reset(void);
+extern void svm_host_osvw_init(void);
+
 #endif /* __ASM_X86_HVM_SVM_H__ */
diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Mon Feb 06 18:37:38 2012 +0100
@@ -516,6 +516,12 @@ struct arch_svm_struct {
     
     /* AMD lightweight profiling MSR */
     uint64_t guest_lwp_cfg;
+
+    /* OSVW MSRs */
+    struct {
+        u64 length;
+        u64 status;
+    } osvw;
 };
 
 struct vmcb_struct *alloc_vmcb(void);
diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/asm-x86/microcode.h
--- a/xen/include/asm-x86/microcode.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/asm-x86/microcode.h	Mon Feb 06 18:37:38 2012 +0100
@@ -11,6 +11,7 @@ struct microcode_ops {
     int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
     int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
     int (*apply_microcode)(int cpu);
+    int (*start_update)(void);
 };
 
 struct cpu_signature {
diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/xen/include/xen/domain.h	Mon Feb 06 18:37:38 2012 +0100
@@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
 
 void arch_vcpu_reset(struct vcpu *v);
 
+extern spinlock_t vcpu_alloc_lock;
 bool_t domctl_lock_acquire(void);
 void domctl_lock_release(void);

Konrad Rzeszutek Wilk

2012-Feb-06 17:47 UTC

head link

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

On Mon, Feb 06, 2012 at 06:39:25PM +0100, Boris Ostrovsky
wrote:> # HG changeset patch
> # User Boris Ostrovsky <boris.ostrovsky@amd.com>
> # Date 1328549858 -3600
> # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
> x86/AMD: Add support for AMD''s OSVW feature in guests.
> 
> In some cases guests should not provide workarounds for errata even when
the
> physical processor is affected. For example, because of erratum 400 on
family
> 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
> going to idle in order to avoid getting stuck in a non-C0 state. This is
not
What about fixing the Linux guest to actually not do this? I presume you
have encountered this with HVM guests - would it be possible to use

set_pm_idle_to_default(default_idle) in the HVM bootup path, say in
''xen_hvm_guest_init'' ??

> necessary: HLT and IO instructions are intercepted and therefore there is
no
> reason for erratum 400 workaround in the guest.
> 
> This patch allows us to present a guest with certain errata as fixed,
> regardless of the state of actual hardware.
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
> Acked-by: Christoph Egger <Christoph.Egger@amd.com>
> 
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 tools/libxc/xc_cpuid_x86.c
> --- a/tools/libxc/xc_cpuid_x86.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/tools/libxc/xc_cpuid_x86.c	Mon Feb 06 18:37:38 2012 +0100
> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
>                      bitmaskof(X86_FEATURE_SSE4A) |
>                      bitmaskof(X86_FEATURE_MISALIGNSSE) |
>                      bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
> +                    bitmaskof(X86_FEATURE_OSVW) |
>                      bitmaskof(X86_FEATURE_XOP) |
>                      bitmaskof(X86_FEATURE_FMA4) |
>                      bitmaskof(X86_FEATURE_TBM) |
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/hvm/svm/svm.c
> --- a/xen/arch/x86/hvm/svm/svm.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/hvm/svm/svm.c	Mon Feb 06 18:37:38 2012 +0100
> @@ -83,6 +83,10 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
>  
>  static bool_t amd_erratum383_found __read_mostly;
>  
> +/* OSVW bits */
> +static uint64_t osvw_length, osvw_status;
> +static DEFINE_SPINLOCK(osvw_lock);
> +
>  void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
>  {
>      struct vcpu *curr = current;
> @@ -902,6 +906,69 @@ static void svm_do_resume(struct vcpu *v
>      reset_stack_and_jump(svm_asm_do_resume);
>  }
>  
> +static void svm_guest_osvw_init(struct vcpu *vcpu)
> +{
> +    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
> +        return;
> +
> +    /*
> +     * Guests should see errata 400 and 415 as fixed (assuming that
> +     * HLT and IO instructions are intercepted).
> +     */
> +    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ?
osvw_length : 3;
> +    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
> +
> +    /*
> +     * By increasing VCPU''s osvw.length to 3 we are telling the
guest that
> +     * all osvw.status bits inside that length, including bit 0 (which is
> +     * reserved for erratum 298), are valid. However, if host
processor''s
> +     * osvw_len is 0 then osvw_status[0] carries no information. We need
to
> +     * be conservative here and therefore we tell the guest that erratum
298
> +     * is present (because we really don''t know).
> +     */
> +    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
> +        vcpu->arch.hvm_svm.osvw.status |= 1;
> +}
> +
> +void svm_host_osvw_reset()
> +{
> +    spin_lock(&osvw_lock);
> +
> +    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
> +    osvw_status = 0;
> +
> +    spin_unlock(&osvw_lock);   
> +}
> +
> +void svm_host_osvw_init()
> +{
> +    spin_lock(&osvw_lock);
> +
> +    /* 
> +     * Get OSVW bits. If bits are not the same on different processors
then
> +     * choose the worst case (i.e. if erratum is present on one processor
and
> +     * not on another assume that the erratum is present everywhere).
> +     */
> +    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
> +    {
> +        uint64_t len, status;
> +         
> +        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
> +             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
> +            len = status = 0;
> +
> +        if (len < osvw_length)
> +            osvw_length = len;
> +
> +        osvw_status |= status;
> +        osvw_status &= (1ULL << osvw_length) - 1;
> +    }
> +    else
> +        osvw_length = osvw_status = 0;
> +
> +    spin_unlock(&osvw_lock);
> +}
> +
>  static int svm_domain_initialise(struct domain *d)
>  {
>      return 0;
> @@ -930,6 +997,9 @@ static int svm_vcpu_initialise(struct vc
>      }
>  
>      vpmu_initialise(v);
> +
> +    svm_guest_osvw_init(v);
> +
>      return 0;
>  }
>  
> @@ -1044,6 +1114,27 @@ static void svm_init_erratum_383(struct 
>      }
>  }
>  
> +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val,
bool_t read)
> +{
> +    uint eax, ebx, ecx, edx;
> +     
> +    /* Guest OSVW support */
> +    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
> +    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
> +        return -1;
> +
> +    if ( read )
> +    {
> +        if (msr == MSR_AMD_OSVW_ID_LENGTH)
> +            *val = v->arch.hvm_svm.osvw.length;
> +        else
> +            *val = v->arch.hvm_svm.osvw.status;
> +    } 
> +    /* Writes are ignored */
> +
> +    return 0;
> +}
> +
>  static int svm_cpu_up(void)
>  {
>      uint64_t msr_content;
> @@ -1094,6 +1185,9 @@ static int svm_cpu_up(void)
>      }
>  #endif
>  
> +    /* Initialize OSVW bits to be used by guests */
> +    svm_host_osvw_init();
> +
>      return 0;
>  }
>  
> @@ -1104,6 +1198,8 @@ struct hvm_function_table * __init start
>      if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
>          return NULL;
>  
> +    svm_host_osvw_reset();
> +
>      if ( svm_cpu_up() )
>      {
>          printk("SVM: failed to initialise.\n");
> @@ -1388,6 +1484,13 @@ static int svm_msr_read_intercept(unsign
>          vpmu_do_rdmsr(msr, msr_content);
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, msr_content, 1);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_rdmsr(v, msr, msr_content);
>          if ( ret < 0 )
> @@ -1512,6 +1615,13 @@ static int svm_msr_write_intercept(unsig
>           */
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, &msr_content, 0);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_wrmsr(v, msr, msr_content);
>          if ( ret < 0 )
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/microcode.c
> --- a/xen/arch/x86/microcode.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode.c	Mon Feb 06 18:37:38 2012 +0100
> @@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
>      info->error = 0;
>      info->cpu = cpumask_first(&cpu_online_map);
>  
> +    if ( microcode_ops->start_update )
> +    {
> +        ret = microcode_ops->start_update();
> +        if ( ret != 0 )
> +        {
> +            xfree(info);
> +            return ret;
> +        }
> +    }
> +
>      return continue_hypercall_on_cpu(info->cpu, do_microcode_update,
info);
>  }
>  
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/microcode_amd.c
> --- a/xen/arch/x86/microcode_amd.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/microcode_amd.c	Mon Feb 06 18:37:38 2012 +0100
> @@ -25,6 +25,7 @@
>  #include <asm/msr.h>
>  #include <asm/processor.h>
>  #include <asm/microcode.h>
> +#include <asm/hvm/svm/svm.h>
>  
>  struct equiv_cpu_entry {
>      uint32_t installed_cpu;
> @@ -71,6 +72,7 @@ struct mpbhdr {
>  /* serialize access to the physical write */
>  static DEFINE_SPINLOCK(microcode_update_lock);
>  
> +/* See comment in start_update() for cases when this routine fails */
>  static int collect_cpu_info(int cpu, struct cpu_signature *csig)
>  {
>      struct cpuinfo_x86 *c = &cpu_data[cpu];
> @@ -287,7 +289,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! Wrong "
>                 "microcode patch file magic\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_amd = xmalloc(struct microcode_amd);
> @@ -295,7 +298,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! "
>                 "Can not allocate memory for microcode patch\n");
> -        return -ENOMEM;
> +        error = -ENOMEM;
> +        goto out;
>      }
>  
>      error = install_equiv_cpu_table(mc_amd, buf, &offset);
> @@ -303,7 +307,8 @@ static int cpu_request_microcode(int cpu
>      {
>          xfree(mc_amd);
>          printk(KERN_ERR "microcode: installing equivalent cpu table
failed\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_old = uci->mc.mc_amd;
> @@ -337,13 +342,19 @@ static int cpu_request_microcode(int cpu
>      /* On success keep the microcode patch for
>       * re-apply on resume.
>       */
> -    if (error == 1)
> +    if ( error == 1 )
>      {
>          xfree(mc_old);
> -        return 0;
> +        error = 0;
> +    } 
> +    else 
> +    {
> +        xfree(mc_amd);
> +        uci->mc.mc_amd = mc_old;
>      }
> -    xfree(mc_amd);
> -    uci->mc.mc_amd = mc_old;
> +
> +  out:
> +    svm_host_osvw_init();
>  
>      return error;
>  }
> @@ -395,11 +406,28 @@ err1:
>      return -ENOMEM;
>  }
>  
> +static int start_update(void) 
> +{
> +    /*
> +     * We assume here that svm_host_osvw_init() will be called on each cpu
(from
> +     * cpu_request_microcode()). 
> +     *
> +     * Note that if collect_cpu_info() returns an error then
> +     * cpu_request_microcode() will not invoked thus leaving OSVW bits not
> +     * updated. Currently though collect_cpu_info() will not fail on
processors
> +     * supporting OSVW so we will not deal with this possibility.
> +     */
> +    svm_host_osvw_reset();
> +
> +    return 0;
> +}
> +
>  static const struct microcode_ops microcode_amd_ops = {
>      .microcode_resume_match           = microcode_resume_match,
>      .cpu_request_microcode            = cpu_request_microcode,
>      .collect_cpu_info                 = collect_cpu_info,
>      .apply_microcode                  = apply_microcode,
> +    .start_update                     = start_update,
>  };
>  
>  static __init int microcode_init_amd(void)
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/arch/x86/platform_hypercall.c
> --- a/xen/arch/x86/platform_hypercall.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/arch/x86/platform_hypercall.c	Mon Feb 06 18:37:38 2012 +0100
> @@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
>              break;
>  
>          guest_from_compat_handle(data, op->u.microcode.data);
> +
> +        /* 
> +         * alloc_vpcu() will access data which is modified during
> +         * microcode update
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret = hypercall_create_continuation(
> +                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
> +                goto out;
> +            }
> +
>          ret = microcode_update(data, op->u.microcode.length);
> +        spin_unlock(&vcpu_alloc_lock);
>      }
>      break;
>  
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/common/domctl.c
> --- a/xen/common/domctl.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/common/domctl.c	Mon Feb 06 18:37:38 2012 +0100
> @@ -29,6 +29,7 @@
>  #include <xsm/xsm.h>
>  
>  static DEFINE_SPINLOCK(domctl_lock);
> +DEFINE_SPINLOCK(vcpu_alloc_lock);
>  
>  int cpumask_to_xenctl_cpumap(
>      struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
> @@ -502,6 +503,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          /* Needed, for example, to ensure writable p.t. state is synced.
*/
>          domain_pause(d);
>  
> +        /* 
> +         * Certain operations (e.g. CPU microcode updates) modify data
which is
> +         * used during VCPU allocation/initialization
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret =  hypercall_create_continuation(
> +                    __HYPERVISOR_domctl, "h", u_domctl);
> +                goto maxvcpu_out_novcpulock;
> +            }
> +
>          /* We cannot reduce maximum VCPUs. */
>          ret = -EINVAL;
>          if ( (max < d->max_vcpus) && (d->vcpu[max] !=
NULL) )
> @@ -551,6 +564,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          ret = 0;
>  
>      maxvcpu_out:
> +        spin_unlock(&vcpu_alloc_lock);
> +
> +    maxvcpu_out_novcpulock:
>          domain_unpause(d);
>          rcu_unlock_domain(d);
>      }
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/asm-x86/hvm/svm/svm.h
> --- a/xen/include/asm-x86/hvm/svm/svm.h	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/hvm/svm/svm.h	Mon Feb 06 18:37:38 2012 +0100
> @@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
>                                    ~TSC_RATIO_RSVD_BITS )
>  #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz,
cpu_khz)
>  
> +extern void svm_host_osvw_reset(void);
> +extern void svm_host_osvw_init(void);
> +
>  #endif /* __ASM_X86_HVM_SVM_H__ */
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/asm-x86/hvm/svm/vmcb.h
> --- a/xen/include/asm-x86/hvm/svm/vmcb.h	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Mon Feb 06 18:37:38 2012 +0100
> @@ -516,6 +516,12 @@ struct arch_svm_struct {
>      
>      /* AMD lightweight profiling MSR */
>      uint64_t guest_lwp_cfg;
> +
> +    /* OSVW MSRs */
> +    struct {
> +        u64 length;
> +        u64 status;
> +    } osvw;
>  };
>  
>  struct vmcb_struct *alloc_vmcb(void);
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/asm-x86/microcode.h
> --- a/xen/include/asm-x86/microcode.h	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/asm-x86/microcode.h	Mon Feb 06 18:37:38 2012 +0100
> @@ -11,6 +11,7 @@ struct microcode_ops {
>      int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
>      int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
>      int (*apply_microcode)(int cpu);
> +    int (*start_update)(void);
>  };
>  
>  struct cpu_signature {
> diff -r e2722b24dc09 -r 3cf8ffd0ab88 xen/include/xen/domain.h
> --- a/xen/include/xen/domain.h	Thu Jan 26 17:43:31 2012 +0000
> +++ b/xen/include/xen/domain.h	Mon Feb 06 18:37:38 2012 +0100
> @@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
>  
>  void arch_vcpu_reset(struct vcpu *v);
>  
> +extern spinlock_t vcpu_alloc_lock;
>  bool_t domctl_lock_acquire(void);
>  void domctl_lock_release(void);
>  
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> lists.xensource.com/xen-devel

Boris Ostrovsky

2012-Feb-06 18:44 UTC

head link

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

On 02/06/12 12:47, Konrad Rzeszutek Wilk wrote:> On Mon, Feb 06, 2012 at 06:39:25PM +0100, Boris Ostrovsky wrote:
>> # HG changeset patch
>> # User Boris Ostrovsky<boris.ostrovsky@amd.com>
>> # Date 1328549858 -3600
>> # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
>> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
>> x86/AMD: Add support for AMD''s OSVW feature in guests.
>>
>> In some cases guests should not provide workarounds for errata even
when the
>> physical processor is affected. For example, because of erratum 400 on
family
>> 10h processors a Linux guest will read an MSR (resulting in VMEXIT)
before
>> going to idle in order to avoid getting stuck in a non-C0 state. This
is not
>
> What about fixing the Linux guest to actually not do this? I presume you
> have encountered this with HVM guests - would it be possible to use
>
> set_pm_idle_to_default(default_idle) in the HVM bootup path, say in
''xen_hvm_guest_init'' ??

Changing Linux won''t help guests running Linux prior to the change so
we
still need this patch. And with the patch guest''s pm_idle will be set
to
default_idle anyway (because cpu_has_amd_erratum(amd_erratum_400) will 
return 0).


-boris
>
>
>> necessary: HLT and IO instructions are intercepted and therefore there
is no
>> reason for erratum 400 workaround in the guest.
>>
>> This patch allows us to present a guest with certain errata as fixed,
>> regardless of the state of actual hardware.

Konrad Rzeszutek Wilk

2012-Feb-06 23:08 UTC

head link

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

On Mon, Feb 06, 2012 at 01:44:27PM -0500, Boris Ostrovsky
wrote:> On 02/06/12 12:47, Konrad Rzeszutek Wilk wrote:
> >On Mon, Feb 06, 2012 at 06:39:25PM +0100, Boris Ostrovsky wrote:
> >># HG changeset patch
> >># User Boris Ostrovsky<boris.ostrovsky@amd.com>
> >># Date 1328549858 -3600
> >># Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
> >># Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
> >>x86/AMD: Add support for AMD''s OSVW feature in guests.
> >>
> >>In some cases guests should not provide workarounds for errata even
when the
> >>physical processor is affected. For example, because of erratum 400
on family
> >>10h processors a Linux guest will read an MSR (resulting in VMEXIT)
before
> >>going to idle in order to avoid getting stuck in a non-C0 state.
This is not
> >
> >What about fixing the Linux guest to actually not do this? I presume
you
> >have encountered this with HVM guests - would it be possible to use
> >
> >set_pm_idle_to_default(default_idle) in the HVM bootup path, say in
''xen_hvm_guest_init'' ??
> 
> 
> Changing Linux won''t help guests running Linux prior to the change
> so we still need this patch. And with the patch guest''s pm_idle
will
Well, it can go on the stable tree.
> be set to default_idle anyway (because
> cpu_has_amd_erratum(amd_erratum_400) will return 0).
Right, but that won''t be called anymore to set the pm_idle b/c pm_idle
is set
to default_idle.

> 
> 
> -boris
> 
> >
> >
> >>necessary: HLT and IO instructions are intercepted and therefore
there is no
> >>reason for erratum 400 workaround in the guest.
> >>
> >>This patch allows us to present a guest with certain errata as
fixed,
> >>regardless of the state of actual hardware.
>

Keir Fraser

2012-Feb-07 05:15 UTC

head link

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

On 07/02/2012 11:51, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>> On 06.02.12 at 18:39, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
>> # HG changeset patch
>> # User Boris Ostrovsky <boris.ostrovsky@amd.com>
>> # Date 1328549858 -3600
>> # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
>> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
>> x86/AMD: Add support for AMD''s OSVW feature in guests.
>> 
>> In some cases guests should not provide workarounds for errata even
when the
>> physical processor is affected. For example, because of erratum 400 on
>> family
>> 10h processors a Linux guest will read an MSR (resulting in VMEXIT)
before
>> going to idle in order to avoid getting stuck in a non-C0 state. This
is not
>> necessary: HLT and IO instructions are intercepted and therefore there
is no
>> reason for erratum 400 workaround in the guest.
>> 
>> This patch allows us to present a guest with certain errata as fixed,
>> regardless of the state of actual hardware.
>> 
>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
>> Acked-by: Christoph Egger <Christoph.Egger@amd.com>
> 
> In the form below/attached (integration with boot time microcode
> loading fixed and trailing white space removed)
> 
> Acked-by: Jan Beulich <jbeulich@suse.com>
There''s one vpcu typo in a comment. Also the trylock while loops would
preferably have braces. Apart from that...

Acked-by: Keir Fraser <keir@xen.org>
> -- Jan
> 
> In some cases guests should not provide workarounds for errata even when
the
> physical processor is affected. For example, because of erratum 400 on
family
> 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
> going to idle in order to avoid getting stuck in a non-C0 state. This is
not
> necessary: HLT and IO instructions are intercepted and therefore there is
no
> reason for erratum 400 workaround in the guest.
> 
> This patch allows us to present a guest with certain errata as fixed,
> regardless of the state of actual hardware.
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
> Acked-by: Christoph Egger <Christoph.Egger@amd.com>
> Acked-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/tools/libxc/xc_cpuid_x86.c
> +++ b/tools/libxc/xc_cpuid_x86.c
> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
>                      bitmaskof(X86_FEATURE_SSE4A) |
>                      bitmaskof(X86_FEATURE_MISALIGNSSE) |
>                      bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
> +                    bitmaskof(X86_FEATURE_OSVW) |
>                      bitmaskof(X86_FEATURE_XOP) |
>                      bitmaskof(X86_FEATURE_FMA4) |
>                      bitmaskof(X86_FEATURE_TBM) |
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -83,6 +83,10 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
>  
>  static bool_t amd_erratum383_found __read_mostly;
>  
> +/* OSVW bits */
> +static uint64_t osvw_length, osvw_status;
> +static DEFINE_SPINLOCK(osvw_lock);
> +
>  void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
>  {
>      struct vcpu *curr = current;
> @@ -902,6 +906,69 @@ static void svm_do_resume(struct vcpu *v
>      reset_stack_and_jump(svm_asm_do_resume);
>  }
>  
> +static void svm_guest_osvw_init(struct vcpu *vcpu)
> +{
> +    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
> +        return;
> +
> +    /*
> +     * Guests should see errata 400 and 415 as fixed (assuming that
> +     * HLT and IO instructions are intercepted).
> +     */
> +    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ?
osvw_length : 3;
> +    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
> +
> +    /*
> +     * By increasing VCPU''s osvw.length to 3 we are telling the
guest that
> +     * all osvw.status bits inside that length, including bit 0 (which is
> +     * reserved for erratum 298), are valid. However, if host
processor''s
> +     * osvw_len is 0 then osvw_status[0] carries no information. We need
to
> +     * be conservative here and therefore we tell the guest that erratum
298
> +     * is present (because we really don''t know).
> +     */
> +    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
> +        vcpu->arch.hvm_svm.osvw.status |= 1;
> +}
> +
> +void svm_host_osvw_reset()
> +{
> +    spin_lock(&osvw_lock);
> +
> +    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
> +    osvw_status = 0;
> +
> +    spin_unlock(&osvw_lock);
> +}
> +
> +void svm_host_osvw_init()
> +{
> +    spin_lock(&osvw_lock);
> +
> +    /*
> +     * Get OSVW bits. If bits are not the same on different processors
then
> +     * choose the worst case (i.e. if erratum is present on one processor
and
> +     * not on another assume that the erratum is present everywhere).
> +     */
> +    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
> +    {
> +        uint64_t len, status;
> +
> +        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
> +             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
> +            len = status = 0;
> +
> +        if (len < osvw_length)
> +            osvw_length = len;
> +
> +        osvw_status |= status;
> +        osvw_status &= (1ULL << osvw_length) - 1;
> +    }
> +    else
> +        osvw_length = osvw_status = 0;
> +
> +    spin_unlock(&osvw_lock);
> +}
> +
>  static int svm_domain_initialise(struct domain *d)
>  {
>      return 0;
> @@ -930,6 +997,9 @@ static int svm_vcpu_initialise(struct vc
>      }
>  
>      vpmu_initialise(v);
> +
> +    svm_guest_osvw_init(v);
> +
>      return 0;
>  }
>  
> @@ -1044,6 +1114,27 @@ static void svm_init_erratum_383(struct
>      }
>  }
>  
> +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val,
> bool_t read)
> +{
> +    uint eax, ebx, ecx, edx;
> +
> +    /* Guest OSVW support */
> +    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
> +    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
> +        return -1;
> +
> +    if ( read )
> +    {
> +        if (msr == MSR_AMD_OSVW_ID_LENGTH)
> +            *val = v->arch.hvm_svm.osvw.length;
> +        else
> +            *val = v->arch.hvm_svm.osvw.status;
> +    }
> +    /* Writes are ignored */
> +
> +    return 0;
> +}
> +
>  static int svm_cpu_up(void)
>  {
>      uint64_t msr_content;
> @@ -1094,6 +1185,9 @@ static int svm_cpu_up(void)
>      }
>  #endif
>  
> +    /* Initialize OSVW bits to be used by guests */
> +    svm_host_osvw_init();
> +
>      return 0;
>  }
>  
> @@ -1104,6 +1198,8 @@ struct hvm_function_table * __init start
>      if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
>          return NULL;
>  
> +    svm_host_osvw_reset();
> +
>      if ( svm_cpu_up() )
>      {
>          printk("SVM: failed to initialise.\n");
> @@ -1388,6 +1484,13 @@ static int svm_msr_read_intercept(unsign
>          vpmu_do_rdmsr(msr, msr_content);
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, msr_content, 1);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_rdmsr(v, msr, msr_content);
>          if ( ret < 0 )
> @@ -1512,6 +1615,13 @@ static int svm_msr_write_intercept(unsig
>           */
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, &msr_content, 0);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_wrmsr(v, msr, msr_content);
>          if ( ret < 0 )
> --- a/xen/arch/x86/microcode.c
> +++ b/xen/arch/x86/microcode.c
> @@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
>      info->error = 0;
>      info->cpu = cpumask_first(&cpu_online_map);
>  
> +    if ( microcode_ops->start_update )
> +    {
> +        ret = microcode_ops->start_update();
> +        if ( ret != 0 )
> +        {
> +            xfree(info);
> +            return ret;
> +        }
> +    }
> +
>      return continue_hypercall_on_cpu(info->cpu, do_microcode_update,
info);
>  }
>  
> @@ -240,6 +250,12 @@ static int __init microcode_init(void)
>      if ( !data )
>          return -ENOMEM;
>  
> +    if ( microcode_ops->start_update &&
microcode_ops->start_update() != 0 )
> +    {
> +        ucode_mod_map(NULL);
> +        return 0;
> +    }
> +
>      softirq_tasklet_init(&tasklet, _do_microcode_update, (unsigned
> long)data);
>  
>      for_each_online_cpu ( cpu )
> --- a/xen/arch/x86/microcode_amd.c
> +++ b/xen/arch/x86/microcode_amd.c
> @@ -25,6 +25,7 @@
>  #include <asm/msr.h>
>  #include <asm/processor.h>
>  #include <asm/microcode.h>
> +#include <asm/hvm/svm/svm.h>
>  
>  struct equiv_cpu_entry {
>      uint32_t installed_cpu;
> @@ -71,6 +72,7 @@ struct mpbhdr {
>  /* serialize access to the physical write */
>  static DEFINE_SPINLOCK(microcode_update_lock);
>  
> +/* See comment in start_update() for cases when this routine fails */
>  static int collect_cpu_info(int cpu, struct cpu_signature *csig)
>  {
>      struct cpuinfo_x86 *c = &cpu_data[cpu];
> @@ -287,7 +289,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! Wrong "
>                 "microcode patch file magic\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_amd = xmalloc(struct microcode_amd);
> @@ -295,7 +298,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! "
>                 "Can not allocate memory for microcode patch\n");
> -        return -ENOMEM;
> +        error = -ENOMEM;
> +        goto out;
>      }
>  
>      error = install_equiv_cpu_table(mc_amd, buf, &offset);
> @@ -303,7 +307,8 @@ static int cpu_request_microcode(int cpu
>      {
>          xfree(mc_amd);
>          printk(KERN_ERR "microcode: installing equivalent cpu table
> failed\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_old = uci->mc.mc_amd;
> @@ -337,13 +342,19 @@ static int cpu_request_microcode(int cpu
>      /* On success keep the microcode patch for
>       * re-apply on resume.
>       */
> -    if (error == 1)
> +    if ( error == 1 )
>      {
>          xfree(mc_old);
> -        return 0;
> +        error = 0;
> +    }
> +    else
> +    {
> +        xfree(mc_amd);
> +        uci->mc.mc_amd = mc_old;
>      }
> -    xfree(mc_amd);
> -    uci->mc.mc_amd = mc_old;
> +
> +  out:
> +    svm_host_osvw_init();
>  
>      return error;
>  }
> @@ -395,11 +406,28 @@ err1:
>      return -ENOMEM;
>  }
>  
> +static int start_update(void)
> +{
> +    /*
> +     * We assume here that svm_host_osvw_init() will be called on each cpu
> (from
> +     * cpu_request_microcode()).
> +     *
> +     * Note that if collect_cpu_info() returns an error then
> +     * cpu_request_microcode() will not invoked thus leaving OSVW bits not
> +     * updated. Currently though collect_cpu_info() will not fail on
> processors
> +     * supporting OSVW so we will not deal with this possibility.
> +     */
> +    svm_host_osvw_reset();
> +
> +    return 0;
> +}
> +
>  static const struct microcode_ops microcode_amd_ops = {
>      .microcode_resume_match           = microcode_resume_match,
>      .cpu_request_microcode            = cpu_request_microcode,
>      .collect_cpu_info                 = collect_cpu_info,
>      .apply_microcode                  = apply_microcode,
> +    .start_update                     = start_update,
>  };
>  
>  static __init int microcode_init_amd(void)
> --- a/xen/arch/x86/platform_hypercall.c
> +++ b/xen/arch/x86/platform_hypercall.c
> @@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
>              break;
>  
>          guest_from_compat_handle(data, op->u.microcode.data);
> +
> +        /*
> +         * alloc_vpcu() will access data which is modified during
> +         * microcode update
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret = hypercall_create_continuation(
> +                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
> +                goto out;
> +            }
> +
>          ret = microcode_update(data, op->u.microcode.length);
> +        spin_unlock(&vcpu_alloc_lock);
>      }
>      break;
>  
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -29,6 +29,7 @@
>  #include <xsm/xsm.h>
>  
>  static DEFINE_SPINLOCK(domctl_lock);
> +DEFINE_SPINLOCK(vcpu_alloc_lock);
>  
>  int cpumask_to_xenctl_cpumap(
>      struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
> @@ -506,6 +507,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          /* Needed, for example, to ensure writable p.t. state is synced.
*/
>          domain_pause(d);
>  
> +        /*
> +         * Certain operations (e.g. CPU microcode updates) modify data
which
> is
> +         * used during VCPU allocation/initialization
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret =  hypercall_create_continuation(
> +                    __HYPERVISOR_domctl, "h", u_domctl);
> +                goto maxvcpu_out_novcpulock;
> +            }
> +
>          /* We cannot reduce maximum VCPUs. */
>          ret = -EINVAL;
>          if ( (max < d->max_vcpus) && (d->vcpu[max] !=
NULL) )
> @@ -555,6 +568,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          ret = 0;
>  
>      maxvcpu_out:
> +        spin_unlock(&vcpu_alloc_lock);
> +
> +    maxvcpu_out_novcpulock:
>          domain_unpause(d);
>          rcu_unlock_domain(d);
>      }
> --- a/xen/include/asm-x86/hvm/svm/svm.h
> +++ b/xen/include/asm-x86/hvm/svm/svm.h
> @@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
>                                    ~TSC_RATIO_RSVD_BITS )
>  #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz,
cpu_khz)
>  
> +extern void svm_host_osvw_reset(void);
> +extern void svm_host_osvw_init(void);
> +
>  #endif /* __ASM_X86_HVM_SVM_H__ */
> --- a/xen/include/asm-x86/hvm/svm/vmcb.h
> +++ b/xen/include/asm-x86/hvm/svm/vmcb.h
> @@ -516,6 +516,12 @@ struct arch_svm_struct {
>      
>      /* AMD lightweight profiling MSR */
>      uint64_t guest_lwp_cfg;
> +
> +    /* OSVW MSRs */
> +    struct {
> +        u64 length;
> +        u64 status;
> +    } osvw;
>  };
>  
>  struct vmcb_struct *alloc_vmcb(void);
> --- a/xen/include/asm-x86/microcode.h
> +++ b/xen/include/asm-x86/microcode.h
> @@ -11,6 +11,7 @@ struct microcode_ops {
>      int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
>      int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
>      int (*apply_microcode)(int cpu);
> +    int (*start_update)(void);
>  };
>  
>  struct cpu_signature {
> --- a/xen/include/xen/domain.h
> +++ b/xen/include/xen/domain.h
> @@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
>  
>  void arch_vcpu_reset(struct vcpu *v);
>  
> +extern spinlock_t vcpu_alloc_lock;
>  bool_t domctl_lock_acquire(void);
>  void domctl_lock_release(void);
>  
> 
>

Jan Beulich

2012-Feb-07 08:26 UTC

head link

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 06.02.12 at 18:47, Konrad Rzeszutek Wilk
<konrad.wilk@oracle.com> wrote:
> On Mon, Feb 06, 2012 at 06:39:25PM +0100, Boris Ostrovsky wrote:
>> # HG changeset patch
>> # User Boris Ostrovsky <boris.ostrovsky@amd.com>
>> # Date 1328549858 -3600
>> # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
>> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
>> x86/AMD: Add support for AMD''s OSVW feature in guests.
>> 
>> In some cases guests should not provide workarounds for errata even
when the
>> physical processor is affected. For example, because of erratum 400 on 
> family
>> 10h processors a Linux guest will read an MSR (resulting in VMEXIT)
before
>> going to idle in order to avoid getting stuck in a non-C0 state. This
is not
> 
> What about fixing the Linux guest to actually not do this? I presume you
> have encountered this with HVM guests - would it be possible to use
> 
> set_pm_idle_to_default(default_idle) in the HVM bootup path, say in 
> ''xen_hvm_guest_init'' ??
Are you saying this would work even with CONFIG_XEN not set
(which I doubt, but is a valid case to consider)?

Also, this sort of adjustment is exactly what is being done better
in the hypervisor, as it''ll address the problem at hand for all guests,
no matter what kernel (version or kind) they run.

Jan

Christoph Egger

2012-Feb-07 09:12 UTC

head link

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

On 02/06/12 19:44, Boris Ostrovsky wrote:> On 02/06/12 12:47, Konrad Rzeszutek Wilk wrote:
>> On Mon, Feb 06, 2012 at 06:39:25PM +0100, Boris Ostrovsky wrote:
>>> # HG changeset patch
>>> # User Boris Ostrovsky<boris.ostrovsky@amd.com>
>>> # Date 1328549858 -3600
>>> # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
>>> # Parent e2722b24dc0962de37215320b05d1bb7c4c42864
>>> x86/AMD: Add support for AMD''s OSVW feature in guests.
>>>
>>> In some cases guests should not provide workarounds for errata even
>>> when the
>>> physical processor is affected. For example, because of erratum 400
>>> on family
>>> 10h processors a Linux guest will read an MSR (resulting in VMEXIT)
>>> before
>>> going to idle in order to avoid getting stuck in a non-C0 state.
This
>>> is not
>>
>> What about fixing the Linux guest to actually not do this? I presume
you
>> have encountered this with HVM guests - would it be possible to use
>>
>> set_pm_idle_to_default(default_idle) in the HVM bootup path, say in
>> ''xen_hvm_guest_init'' ??
>
>
> Changing Linux won''t help guests running Linux prior to the change
so we
> still need this patch. And with the patch guest''s pm_idle will be
set to
> default_idle anyway (because cpu_has_amd_erratum(amd_erratum_400) will
> return 0).
Right. Also fixing Linux won''t help non-Linux guests, too.

Christoph

>
>
> -boris
>
>>
>>
>>> necessary: HLT and IO instructions are intercepted and therefore
>>> there is no
>>> reason for erratum 400 workaround in the guest.
>>>
>>> This patch allows us to present a guest with certain errata as
fixed,
>>> regardless of the state of actual hardware.
>

-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

Jan Beulich

2012-Feb-07 11:51 UTC

head link

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

>>> On 06.02.12 at 18:39, Boris Ostrovsky
<boris.ostrovsky@amd.com> wrote:
> # HG changeset patch
> # User Boris Ostrovsky <boris.ostrovsky@amd.com>
> # Date 1328549858 -3600
> # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
> x86/AMD: Add support for AMD''s OSVW feature in guests.
> 
> In some cases guests should not provide workarounds for errata even when
the
> physical processor is affected. For example, because of erratum 400 on 
> family
> 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
> going to idle in order to avoid getting stuck in a non-C0 state. This is
not
> necessary: HLT and IO instructions are intercepted and therefore there is
no
> reason for erratum 400 workaround in the guest.
> 
> This patch allows us to present a guest with certain errata as fixed,
> regardless of the state of actual hardware.
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
> Acked-by: Christoph Egger <Christoph.Egger@amd.com>
In the form below/attached (integration with boot time microcode
loading fixed and trailing white space removed)

Acked-by: Jan Beulich <jbeulich@suse.com>

-- Jan

In some cases guests should not provide workarounds for errata even when the
physical processor is affected. For example, because of erratum 400 on family
10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
going to idle in order to avoid getting stuck in a non-C0 state. This is not
necessary: HLT and IO instructions are intercepted and therefore there is no
reason for erratum 400 workaround in the guest.

This patch allows us to present a guest with certain errata as fixed,
regardless of the state of actual hardware.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
Acked-by: Christoph Egger <Christoph.Egger@amd.com>
Acked-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
                     bitmaskof(X86_FEATURE_SSE4A) |
                     bitmaskof(X86_FEATURE_MISALIGNSSE) |
                     bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
+                    bitmaskof(X86_FEATURE_OSVW) |
                     bitmaskof(X86_FEATURE_XOP) |
                     bitmaskof(X86_FEATURE_FMA4) |
                     bitmaskof(X86_FEATURE_TBM) |
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -83,6 +83,10 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
 
 static bool_t amd_erratum383_found __read_mostly;
 
+/* OSVW bits */
+static uint64_t osvw_length, osvw_status;
+static DEFINE_SPINLOCK(osvw_lock);
+
 void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
 {
     struct vcpu *curr = current;
@@ -902,6 +906,69 @@ static void svm_do_resume(struct vcpu *v
     reset_stack_and_jump(svm_asm_do_resume);
 }
 
+static void svm_guest_osvw_init(struct vcpu *vcpu)
+{
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+        return;
+
+    /*
+     * Guests should see errata 400 and 415 as fixed (assuming that
+     * HLT and IO instructions are intercepted).
+     */
+    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length :
3;
+    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
+
+    /*
+     * By increasing VCPU''s osvw.length to 3 we are telling the guest
that
+     * all osvw.status bits inside that length, including bit 0 (which is
+     * reserved for erratum 298), are valid. However, if host
processor''s
+     * osvw_len is 0 then osvw_status[0] carries no information. We need to
+     * be conservative here and therefore we tell the guest that erratum 298
+     * is present (because we really don''t know).
+     */
+    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
+        vcpu->arch.hvm_svm.osvw.status |= 1;
+}
+
+void svm_host_osvw_reset()
+{
+    spin_lock(&osvw_lock);
+
+    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
+    osvw_status = 0;
+
+    spin_unlock(&osvw_lock);
+}
+
+void svm_host_osvw_init()
+{
+    spin_lock(&osvw_lock);
+
+    /*
+     * Get OSVW bits. If bits are not the same on different processors then
+     * choose the worst case (i.e. if erratum is present on one processor and
+     * not on another assume that the erratum is present everywhere).
+     */
+    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
+    {
+        uint64_t len, status;
+
+        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
+             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
+            len = status = 0;
+
+        if (len < osvw_length)
+            osvw_length = len;
+
+        osvw_status |= status;
+        osvw_status &= (1ULL << osvw_length) - 1;
+    }
+    else
+        osvw_length = osvw_status = 0;
+
+    spin_unlock(&osvw_lock);
+}
+
 static int svm_domain_initialise(struct domain *d)
 {
     return 0;
@@ -930,6 +997,9 @@ static int svm_vcpu_initialise(struct vc
     }
 
     vpmu_initialise(v);
+
+    svm_guest_osvw_init(v);
+
     return 0;
 }
 
@@ -1044,6 +1114,27 @@ static void svm_init_erratum_383(struct 
     }
 }
 
+static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val, bool_t
read)
+{
+    uint eax, ebx, ecx, edx;
+
+    /* Guest OSVW support */
+    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
+        return -1;
+
+    if ( read )
+    {
+        if (msr == MSR_AMD_OSVW_ID_LENGTH)
+            *val = v->arch.hvm_svm.osvw.length;
+        else
+            *val = v->arch.hvm_svm.osvw.status;
+    }
+    /* Writes are ignored */
+
+    return 0;
+}
+
 static int svm_cpu_up(void)
 {
     uint64_t msr_content;
@@ -1094,6 +1185,9 @@ static int svm_cpu_up(void)
     }
 #endif
 
+    /* Initialize OSVW bits to be used by guests */
+    svm_host_osvw_init();
+
     return 0;
 }
 
@@ -1104,6 +1198,8 @@ struct hvm_function_table * __init start
     if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
         return NULL;
 
+    svm_host_osvw_reset();
+
     if ( svm_cpu_up() )
     {
         printk("SVM: failed to initialise.\n");
@@ -1388,6 +1484,13 @@ static int svm_msr_read_intercept(unsign
         vpmu_do_rdmsr(msr, msr_content);
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, msr_content, 1);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_rdmsr(v, msr, msr_content);
         if ( ret < 0 )
@@ -1512,6 +1615,13 @@ static int svm_msr_write_intercept(unsig
          */
         break;
 
+    case MSR_AMD_OSVW_ID_LENGTH:
+    case MSR_AMD_OSVW_STATUS:
+        ret = svm_handle_osvw(v, msr, &msr_content, 0);
+        if ( ret < 0 )
+            goto gpf;
+        break;
+
     default:
         ret = nsvm_wrmsr(v, msr, msr_content);
         if ( ret < 0 )
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
     info->error = 0;
     info->cpu = cpumask_first(&cpu_online_map);
 
+    if ( microcode_ops->start_update )
+    {
+        ret = microcode_ops->start_update();
+        if ( ret != 0 )
+        {
+            xfree(info);
+            return ret;
+        }
+    }
+
     return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
 }
 
@@ -240,6 +250,12 @@ static int __init microcode_init(void)
     if ( !data )
         return -ENOMEM;
 
+    if ( microcode_ops->start_update &&
microcode_ops->start_update() != 0 )
+    {
+        ucode_mod_map(NULL);
+        return 0;
+    }
+
     softirq_tasklet_init(&tasklet, _do_microcode_update, (unsigned
long)data);
 
     for_each_online_cpu ( cpu )
--- a/xen/arch/x86/microcode_amd.c
+++ b/xen/arch/x86/microcode_amd.c
@@ -25,6 +25,7 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/microcode.h>
+#include <asm/hvm/svm/svm.h>
 
 struct equiv_cpu_entry {
     uint32_t installed_cpu;
@@ -71,6 +72,7 @@ struct mpbhdr {
 /* serialize access to the physical write */
 static DEFINE_SPINLOCK(microcode_update_lock);
 
+/* See comment in start_update() for cases when this routine fails */
 static int collect_cpu_info(int cpu, struct cpu_signature *csig)
 {
     struct cpuinfo_x86 *c = &cpu_data[cpu];
@@ -287,7 +289,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! Wrong "
                "microcode patch file magic\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_amd = xmalloc(struct microcode_amd);
@@ -295,7 +298,8 @@ static int cpu_request_microcode(int cpu
     {
         printk(KERN_ERR "microcode: error! "
                "Can not allocate memory for microcode patch\n");
-        return -ENOMEM;
+        error = -ENOMEM;
+        goto out;
     }
 
     error = install_equiv_cpu_table(mc_amd, buf, &offset);
@@ -303,7 +307,8 @@ static int cpu_request_microcode(int cpu
     {
         xfree(mc_amd);
         printk(KERN_ERR "microcode: installing equivalent cpu table
failed\n");
-        return -EINVAL;
+        error = -EINVAL;
+        goto out;
     }
 
     mc_old = uci->mc.mc_amd;
@@ -337,13 +342,19 @@ static int cpu_request_microcode(int cpu
     /* On success keep the microcode patch for
      * re-apply on resume.
      */
-    if (error == 1)
+    if ( error == 1 )
     {
         xfree(mc_old);
-        return 0;
+        error = 0;
+    }
+    else
+    {
+        xfree(mc_amd);
+        uci->mc.mc_amd = mc_old;
     }
-    xfree(mc_amd);
-    uci->mc.mc_amd = mc_old;
+
+  out:
+    svm_host_osvw_init();
 
     return error;
 }
@@ -395,11 +406,28 @@ err1:
     return -ENOMEM;
 }
 
+static int start_update(void)
+{
+    /*
+     * We assume here that svm_host_osvw_init() will be called on each cpu
(from
+     * cpu_request_microcode()).
+     *
+     * Note that if collect_cpu_info() returns an error then
+     * cpu_request_microcode() will not invoked thus leaving OSVW bits not
+     * updated. Currently though collect_cpu_info() will not fail on processors
+     * supporting OSVW so we will not deal with this possibility.
+     */
+    svm_host_osvw_reset();
+
+    return 0;
+}
+
 static const struct microcode_ops microcode_amd_ops = {
     .microcode_resume_match           = microcode_resume_match,
     .cpu_request_microcode            = cpu_request_microcode,
     .collect_cpu_info                 = collect_cpu_info,
     .apply_microcode                  = apply_microcode,
+    .start_update                     = start_update,
 };
 
 static __init int microcode_init_amd(void)
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
             break;
 
         guest_from_compat_handle(data, op->u.microcode.data);
+
+        /*
+         * alloc_vpcu() will access data which is modified during
+         * microcode update
+         */
+        while ( !spin_trylock(&vcpu_alloc_lock) )
+            if ( hypercall_preempt_check() )
+            {
+                ret = hypercall_create_continuation(
+                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
+                goto out;
+            }
+
         ret = microcode_update(data, op->u.microcode.length);
+        spin_unlock(&vcpu_alloc_lock);
     }
     break;
 
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -29,6 +29,7 @@
 #include <xsm/xsm.h>
 
 static DEFINE_SPINLOCK(domctl_lock);
+DEFINE_SPINLOCK(vcpu_alloc_lock);
 
 int cpumask_to_xenctl_cpumap(
     struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
@@ -506,6 +507,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         /* Needed, for example, to ensure writable p.t. state is synced. */
         domain_pause(d);
 
+        /*
+         * Certain operations (e.g. CPU microcode updates) modify data which is
+         * used during VCPU allocation/initialization
+         */
+        while ( !spin_trylock(&vcpu_alloc_lock) )
+            if ( hypercall_preempt_check() )
+            {
+                ret =  hypercall_create_continuation(
+                    __HYPERVISOR_domctl, "h", u_domctl);
+                goto maxvcpu_out_novcpulock;
+            }
+
         /* We cannot reduce maximum VCPUs. */
         ret = -EINVAL;
         if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
@@ -555,6 +568,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         ret = 0;
 
     maxvcpu_out:
+        spin_unlock(&vcpu_alloc_lock);
+
+    maxvcpu_out_novcpulock:
         domain_unpause(d);
         rcu_unlock_domain(d);
     }
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
                                   ~TSC_RATIO_RSVD_BITS )
 #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz,
cpu_khz)
 
+extern void svm_host_osvw_reset(void);
+extern void svm_host_osvw_init(void);
+
 #endif /* __ASM_X86_HVM_SVM_H__ */
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -516,6 +516,12 @@ struct arch_svm_struct {
     
     /* AMD lightweight profiling MSR */
     uint64_t guest_lwp_cfg;
+
+    /* OSVW MSRs */
+    struct {
+        u64 length;
+        u64 status;
+    } osvw;
 };
 
 struct vmcb_struct *alloc_vmcb(void);
--- a/xen/include/asm-x86/microcode.h
+++ b/xen/include/asm-x86/microcode.h
@@ -11,6 +11,7 @@ struct microcode_ops {
     int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
     int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
     int (*apply_microcode)(int cpu);
+    int (*start_update)(void);
 };
 
 struct cpu_signature {
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
 
 void arch_vcpu_reset(struct vcpu *v);
 
+extern spinlock_t vcpu_alloc_lock;
 bool_t domctl_lock_acquire(void);
 void domctl_lock_release(void);
 




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
lists.xensource.com/xen-devel

Konrad Rzeszutek Wilk

2012-Feb-07 17:05 UTC

head link

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

On Tue, Feb 07, 2012 at 08:26:57AM +0000, Jan Beulich
wrote:> >>> On 06.02.12 at 18:47, Konrad Rzeszutek Wilk
<konrad.wilk@oracle.com> wrote:
> > On Mon, Feb 06, 2012 at 06:39:25PM +0100, Boris Ostrovsky wrote:
> >> # HG changeset patch
> >> # User Boris Ostrovsky <boris.ostrovsky@amd.com>
> >> # Date 1328549858 -3600
> >> # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
> >> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
> >> x86/AMD: Add support for AMD''s OSVW feature in guests.
> >> 
> >> In some cases guests should not provide workarounds for errata
even when the
> >> physical processor is affected. For example, because of erratum
400 on
> > family
> >> 10h processors a Linux guest will read an MSR (resulting in
VMEXIT) before
> >> going to idle in order to avoid getting stuck in a non-C0 state.
This is not
> > 
> > What about fixing the Linux guest to actually not do this? I presume
you
> > have encountered this with HVM guests - would it be possible to use
> > 
> > set_pm_idle_to_default(default_idle) in the HVM bootup path, say in 
> > ''xen_hvm_guest_init'' ??
> 
> Are you saying this would work even with CONFIG_XEN not set
> (which I doubt, but is a valid case to consider)?
Ugh. Not in that case.
> 
> Also, this sort of adjustment is exactly what is being done better
> in the hypervisor, as it''ll address the problem at hand for all
guests,
> no matter what kernel (version or kind) they run.
Right. I concur that it should also be done in the hypervisor. But earlier
versions of the hypervisor are not getting this patch. So fixing it in
the kernel could cover some issues if one is using Xen 4.0 for example.

Hm, it seems that I actually neglected to say that and made it sound
like it should be either in the hypervisor or in the kernel. I meant both.
> 
> Jan

Xen devel - Feb 2012 - [PATCH v5] x86/AMD: Add support for AMD's OSVW feature in guests

[PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests

Re: [PATCH v5] x86/AMD: Add support for AMD''s OSVW feature in guests