thr3ads.net - Xen devel - [PATCH RFC 0/2] linux/vnuma: vnuma topology PV guest [Aug 2013]

If this information is useful, please help other people find it:
Share via:

Elena Ufimtseva

2013-Aug-27 08:52 UTC

[PATCH RFC 0/2] linux/vnuma: vnuma topology PV guest

Enables PV guest to discover NUMA topology provided by XEN
and inits this topology on boot.
Xen provides numa memblocks aligned for guest, distance table,
cpu setup per node.
On failure defaults to dummy linux numa init.

TODO:
Change XENMEM hypercall subop to SYSCTL.

Elena Ufimtseva (2):
  vNUMA PV Guest Linux support
  Enables NUMA for PV guest

 arch/x86/include/asm/xen/vnuma-xen.h |   36 +++++++++++
 arch/x86/mm/numa.c                   |    8 +++
 arch/x86/xen/enlighten.c             |  115 ++++++++++++++++++++++++++++++++++
 arch/x86/xen/setup.c                 |    2 +-
 include/xen/interface/memory.h       |    1 +
 5 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/xen/vnuma-xen.h

-- 
1.7.10.4

Elena Ufimtseva

2013-Aug-27 08:52 UTC

head link

[PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Uses subop hypercall to request XEN about vnuma topology.
Sets the memory blocks (aligned by XEN), cpus, distance table
on boot. NUMA support should be compiled in kernel.

NUMA support should be compiled in kernel.

Memory blocks are aligned by xen. Xen is aware of guest initial
RAM layout.
If the memory blocks are incorrect, call for default linux numa
dummy init.

Requires XEN with vNUMA support.

Applies to 3.10 git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git
latest commit: 8bb495e3f02401ee6f76d1b1d77f3ac9f079e376

TODO:
Change subop from XENMEM to SYSCTL.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 arch/x86/include/asm/xen/vnuma-xen.h |   36 +++++++++++
 arch/x86/mm/numa.c                   |    8 +++
 arch/x86/xen/enlighten.c             |  115 ++++++++++++++++++++++++++++++++++
 include/xen/interface/memory.h       |    1 +
 4 files changed, 160 insertions(+)
 create mode 100644 arch/x86/include/asm/xen/vnuma-xen.h

diff --git a/arch/x86/include/asm/xen/vnuma-xen.h
b/arch/x86/include/asm/xen/vnuma-xen.h
new file mode 100644
index 0000000..73c1bde
--- /dev/null
+++ b/arch/x86/include/asm/xen/vnuma-xen.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_X86_VNUMA_XEN_H
+#define _ASM_X86_VNUMA_XEN_H
+
+#ifdef CONFIG_XEN
+int __initdata xen_numa_init(void);
+
+struct vnuma_memblk {
+	          uint64_t start, end;
+};
+DEFINE_GUEST_HANDLE_STRUCT(vnuma_memblk);
+
+struct vnuma_topology_info {
+	domid_t domid;
+	uint16_t nr_nodes;
+	GUEST_HANDLE(vnuma_memblk) vmemblk;
+	GUEST_HANDLE(int) vdistance;
+	GUEST_HANDLE(int) cpu_to_node;
+	GUEST_HANDLE(int) vnode_to_pnode;
+};
+DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info);
+
+struct xen_domctl {
+	uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
+	domid_t  domain;
+	union {
+		struct vnuma_topology_info vnuma;
+		} u;
+};
+typedef struct xen_domctl xen_domctl_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_t);
+
+#else
+int __init xen_numa_init(void) {}
+#endif
+
+#endif /* _ASM_X86_VNUMA_XEN_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 8bf93ba..3ec7855 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -20,6 +20,10 @@
 
 #include "numa_internal.h"
 
+#ifdef CONFIG_XEN
+#include "asm/xen/vnuma-xen.h"
+#endif
+
 int __initdata numa_off;
 nodemask_t numa_nodes_parsed __initdata;
 
@@ -621,6 +625,10 @@ static int __init dummy_numa_init(void)
 void __init x86_numa_init(void)
 {
 	if (!numa_off) {
+#ifdef CONFIG_XEN
+		if (xen_pv_domain() && !numa_init(xen_numa_init))
+			return;
+#endif
 #ifdef CONFIG_X86_NUMAQ
 		if (!numa_init(numaq_numa_init))
 			return;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 193097e..4658e9d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,7 @@
 #include <linux/memblock.h>
 #include <linux/edd.h>
 
+#include <asm/numa.h>
 #include <xen/xen.h>
 #include <xen/events.h>
 #include <xen/interface/xen.h>
@@ -69,6 +70,7 @@
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
 #include <asm/pat.h>
+#include <asm/xen/vnuma-xen.h>
 
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
@@ -1750,4 +1752,117 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst
= {
 	.x2apic_available	= xen_x2apic_para_available,
 };
 EXPORT_SYMBOL(x86_hyper_xen_hvm);
+
+/* Xen PV NUMA topology initialization */
+int __initdata xen_numa_init(void)
+{
+	struct vnuma_memblk *vblk;
+	struct vnuma_topology_info numa_topo;
+	uint64_t size, start, end;
+	int i, j, cpu, rc;
+	u64 phys, physd, physc;
+	u64 new_end_pfn;
+	size_t mem_size;
+
+	int *vdistance, *cpu_to_node;
+	unsigned long dist_size, cpu_to_node_size;
+	numa_topo.domid = DOMID_SELF;
+	rc = -EINVAL;
+	/* block mem reservation for memblks */
+	mem_size = num_possible_cpus() * sizeof(struct vnuma_memblk);
+	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), mem_size,
PAGE_SIZE);
+	if (!phys) {
+		pr_info("vNUMA: can''t allocate memory for membloks size
%zu\n", mem_size);
+		rc = -ENOMEM;
+		goto errout;
+	}
+	if (memblock_reserve(phys, mem_size) < 0) {
+		pr_info("vNUMA: Cannot reserver mem for blocks of size %zu \n",
mem_size);
+		rc = -ENOMEM;
+		goto errout;
+	}
+	vblk = __va(phys);
+	set_xen_guest_handle(numa_topo.vmemblk, vblk);
+
+	dist_size = num_possible_cpus() * num_possible_cpus() *
sizeof(*numa_topo.vdistance);
+	physd = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), dist_size,
PAGE_SIZE);
+	if (!physd) {
+		pr_info("vNUMA: can''t allocate memory for distance size
%zu\n", dist_size);
+		rc = -ENOMEM;
+		goto errout;
+	}
+	if (memblock_reserve(physd, dist_size) < 0) {
+		pr_info("vNUMA: Cannot reserver mem for blocks of size %zu \n",
dist_size);
+		rc = -ENOMEM;
+		goto errout;
+	}
+	vdistance  = __va(physd);
+	set_xen_guest_handle(numa_topo.vdistance, (int *)vdistance);
+
+	/* allocate memblock for vcpu to node mask of max size */
+	cpu_to_node_size = num_possible_cpus() * sizeof(*numa_topo.cpu_to_node);
+	physc = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), cpu_to_node_size,
PAGE_SIZE);
+	if (!physc) {
+		pr_info("vNUMA: can''t allocate memory for distance size
%zu\n", cpu_to_node_size);
+		rc = -ENOMEM;
+		goto errout;
+	}
+	if (memblock_reserve(physc, cpu_to_node_size) < 0) {
+		pr_info("vNUMA: Cannot reserver mem for blocks of size %zu\n",
cpu_to_node_size);
+		goto errout;
+	}
+	cpu_to_node  = __va(physc);
+	set_xen_guest_handle(numa_topo.cpu_to_node, (int *)cpu_to_node);
+	if (HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo) < 0)
+		goto errout;
+	if (numa_topo.nr_nodes == 0)
+		/* Pass to DUMMY numa */
+		goto errout;
+	if (numa_topo.nr_nodes > num_possible_cpus()) {
+		pr_info("vNUMA: Node without cpu is not supported, nodes = %d\n",
numa_topo.nr_nodes);
+		goto errout;
+	}
+	new_end_pfn = 0;
+	/* We have sizes in pages received from hypervisor, no holes and ordered */
+	for (i = 0; i < numa_topo.nr_nodes; i++) {
+		start = vblk[i].start;
+		end = vblk[i].end;
+		size = end - start;
+		pr_info("VNMA blk[%d] size is %#010Lx start = %#010Lx end =
%#010Lx\n",
+			i, size, start, end);
+		if (numa_add_memblk(i, start, end)) {
+			pr_info("vNUMA: Could not add memblk[%d]\n", i);
+			rc = -EAGAIN;
+			goto errout;
+		}
+		node_set(i, numa_nodes_parsed);
+	}
+	setup_nr_node_ids();
+	/* Setting the cpu, apicid to node */
+	for_each_cpu(cpu, cpu_possible_mask) {
+		pr_info("Setting up vcpu[%d] to node[%d]\n", cpu,
cpu_to_node[cpu]);
+		set_apicid_to_node(cpu, cpu_to_node[cpu]);
+		numa_set_node(cpu, cpu_to_node[cpu]);
+		__apicid_to_node[cpu] = cpu_to_node[cpu];
+		cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node[cpu]]);
+	}
+	setup_nr_node_ids();
+	rc = 0;
+	for (i = 0; i < numa_topo.nr_nodes; i++)
+		for (j = 0; j < numa_topo.nr_nodes; j++) {
+			numa_set_distance(i, j, *(vdistance + j*numa_topo.nr_nodes + i));
+			pr_info("distance %d <-> %d = %d\n", i, j, *(vdistance +
j*numa_topo.nr_nodes + i));
+		}
+errout:
+	if (phys)
+		memblock_free(__pa(phys), mem_size);
+	if (physd)
+		memblock_free(__pa(physd), dist_size);
+	if (physc)
+		memblock_free(__pa(physc), cpu_to_node_size);
+	if (rc)
+		pr_debug("XEN: hypercall failed to get vNUMA topology.\n");
+	return rc;
+}
+
 #endif
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index 2ecfe4f..16b8d87 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -263,4 +263,5 @@ struct xen_remove_from_physmap {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
 
+#define XENMEM_get_vnuma_info	25
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
-- 
1.7.10.4

Elena Ufimtseva

2013-Aug-27 08:53 UTC

head link

[PATCH RFC 2/2] linux/vnuma: Enables NUMA support for PV guest

Enables NUMA support for PV guest and uses vNUMA
interface to discover topology provided by XEN.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 arch/x86/xen/setup.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 056d11f..7e0c93b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -561,6 +561,6 @@ void __init xen_arch_setup(void)
 	WARN_ON(xen_set_default_idle());
 	fiddle_vdso();
 #ifdef CONFIG_NUMA
-	numa_off = 1;
+	numa_off = 0;
 #endif
 }
-- 
1.7.10.4

David Vrabel

2013-Aug-27 13:35 UTC

head link

Re: [PATCH RFC 2/2] linux/vnuma: Enables NUMA support for PV guest

On 27/08/13 09:53, Elena Ufimtseva wrote:> Enables NUMA support for PV guest and uses vNUMA
> interface to discover topology provided by XEN.
> 
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
> ---
>  arch/x86/xen/setup.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
> index 056d11f..7e0c93b 100644
> --- a/arch/x86/xen/setup.c
> +++ b/arch/x86/xen/setup.c
> @@ -561,6 +561,6 @@ void __init xen_arch_setup(void)
>  	WARN_ON(xen_set_default_idle());
>  	fiddle_vdso();
>  #ifdef CONFIG_NUMA
> -	numa_off = 1;
> +	numa_off = 0;
>  #endif
>  }
Hmm. This is either safe to turn on now or it will break on hosts
without vNUMA support as it will cause a fallback to try all the
hardware specific NUMA initialization.

Which is it?

David

David Vrabel

2013-Aug-27 13:48 UTC

head link

Re: [PATCH RFC 0/2] linux/vnuma: vnuma topology PV guest

On 27/08/13 09:52, Elena Ufimtseva wrote:> Enables PV guest to discover NUMA topology provided by XEN
> and inits this topology on boot.
> Xen provides numa memblocks aligned for guest, distance table,
> cpu setup per node.
> On failure defaults to dummy linux numa init.
> 
> TODO:
> Change XENMEM hypercall subop to SYSCTL.
Please Cc the Xen maintainers in future: Konrad, Boris and myself.  You
will also need to Cc the x86 maintainers when it''s no longer an RFC.

David
> 
> Elena Ufimtseva (2):
>   vNUMA PV Guest Linux support
>   Enables NUMA for PV guest
> 
>  arch/x86/include/asm/xen/vnuma-xen.h |   36 +++++++++++
>  arch/x86/mm/numa.c                   |    8 +++
>  arch/x86/xen/enlighten.c             |  115
++++++++++++++++++++++++++++++++++
>  arch/x86/xen/setup.c                 |    2 +-
>  include/xen/interface/memory.h       |    1 +
>  5 files changed, 161 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/include/asm/xen/vnuma-xen.h
>

David Vrabel

2013-Aug-27 13:52 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On 27/08/13 09:52, Elena Ufimtseva wrote:> Uses subop hypercall to request XEN about vnuma topology.
> Sets the memory blocks (aligned by XEN), cpus, distance table
> on boot. NUMA support should be compiled in kernel.
> 
> NUMA support should be compiled in kernel.
> 
> Memory blocks are aligned by xen. Xen is aware of guest initial
> RAM layout.
> If the memory blocks are incorrect, call for default linux numa
> dummy init.
> 
> Requires XEN with vNUMA support.
> 
> Applies to 3.10
git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git
> latest commit: 8bb495e3f02401ee6f76d1b1d77f3ac9f079e376
> 
> TODO:
> Change subop from XENMEM to SYSCTL.
XENMEM subop seems right here.
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
> ---
>  arch/x86/include/asm/xen/vnuma-xen.h |   36 +++++++++++
>  arch/x86/mm/numa.c                   |    8 +++
>  arch/x86/xen/enlighten.c             |  115
++++++++++++++++++++++++++++++++++
>  include/xen/interface/memory.h       |    1 +
>  4 files changed, 160 insertions(+)
>  create mode 100644 arch/x86/include/asm/xen/vnuma-xen.h
> 
> diff --git a/arch/x86/include/asm/xen/vnuma-xen.h
b/arch/x86/include/asm/xen/vnuma-xen.h
> new file mode 100644
> index 0000000..73c1bde
> --- /dev/null
> +++ b/arch/x86/include/asm/xen/vnuma-xen.h
> @@ -0,0 +1,36 @@
> +#ifndef _ASM_X86_VNUMA_XEN_H
> +#define _ASM_X86_VNUMA_XEN_H
> +
> +#ifdef CONFIG_XEN
> +int __initdata xen_numa_init(void);
> +
> +struct vnuma_memblk {
> +	          uint64_t start, end;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_memblk);
> +
> +struct vnuma_topology_info {
> +	domid_t domid;
> +	uint16_t nr_nodes;
> +	GUEST_HANDLE(vnuma_memblk) vmemblk;
> +	GUEST_HANDLE(int) vdistance;
> +	GUEST_HANDLE(int) cpu_to_node;
> +	GUEST_HANDLE(int) vnode_to_pnode;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info);
> +
> +struct xen_domctl {
> +	uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
> +	domid_t  domain;
> +	union {
> +		struct vnuma_topology_info vnuma;
> +		} u;
> +};
> +typedef struct xen_domctl xen_domctl_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_t);
Most of this is the public interface provided by Xen, this should be in
include/xen/interface/memory.h. And only the xen_numa_init() prototype
in this header.
> +
> +#else
> +int __init xen_numa_init(void) {}
> +#endif
> +
> +#endif /* _ASM_X86_VNUMA_XEN_H */
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 8bf93ba..3ec7855 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -20,6 +20,10 @@
#include <asm/xen/vnuma.h>

here.
>  #include "numa_internal.h"
>  
> +#ifdef CONFIG_XEN
> +#include "asm/xen/vnuma-xen.h"
> +#endif
Instead of this.
> +
>  int __initdata numa_off;
>  nodemask_t numa_nodes_parsed __initdata;
>  
> @@ -621,6 +625,10 @@ static int __init dummy_numa_init(void)
>  void __init x86_numa_init(void)
>  {
>  	if (!numa_off) {
> +#ifdef CONFIG_XEN
> +		if (xen_pv_domain() && !numa_init(xen_numa_init))
> +			return;
> +#endif
Move the test for xen_pv_domain() into xen_numa_init.
>  #ifdef CONFIG_X86_NUMAQ
>  		if (!numa_init(numaq_numa_init))
>  			return;
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 193097e..4658e9d 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -33,6 +33,7 @@
>  #include <linux/memblock.h>
>  #include <linux/edd.h>
>  
> +#include <asm/numa.h>
>  #include <xen/xen.h>
>  #include <xen/events.h>
>  #include <xen/interface/xen.h>
> @@ -69,6 +70,7 @@
>  #include <asm/mwait.h>
>  #include <asm/pci_x86.h>
>  #include <asm/pat.h>
> +#include <asm/xen/vnuma-xen.h>
>  
>  #ifdef CONFIG_ACPI
>  #include <linux/acpi.h>
> @@ -1750,4 +1752,117 @@ const struct hypervisor_x86 x86_hyper_xen_hvm
__refconst = {
>  	.x2apic_available	= xen_x2apic_para_available,
>  };
>  EXPORT_SYMBOL(x86_hyper_xen_hvm);
> +
> +/* Xen PV NUMA topology initialization */
> +int __initdata xen_numa_init(void)
This function needs to more clearly separate the different parts.  This
could be by splitting it into several functions or by improving the
comments and spacing.

I''d also move it to a new file: arch/x86/xen/vnuma.c.
> +{
> +	struct vnuma_memblk *vblk;
> +	struct vnuma_topology_info numa_topo;
> +	uint64_t size, start, end;
> +	int i, j, cpu, rc;
> +	u64 phys, physd, physc;
> +	u64 new_end_pfn;
> +	size_t mem_size;
> +
> +	int *vdistance, *cpu_to_node;
> +	unsigned long dist_size, cpu_to_node_size;
> +	numa_topo.domid = DOMID_SELF;
> +	rc = -EINVAL;
> +	/* block mem reservation for memblks */
> +	mem_size = num_possible_cpus() * sizeof(struct vnuma_memblk);
> +	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), mem_size,
PAGE_SIZE);
> +	if (!phys) {
> +		pr_info("vNUMA: can''t allocate memory for membloks size
%zu\n", mem_size);
> +		rc = -ENOMEM;
> +		goto errout;
> +	}
> +	if (memblock_reserve(phys, mem_size) < 0) {
> +		pr_info("vNUMA: Cannot reserver mem for blocks of size %zu
\n", mem_size);
> +		rc = -ENOMEM;
> +		goto errout;
> +	}
Does mem_block_alloc() work here instead of the find_in_range/reserve
combo?  Similarly for the other allocations.
> +	vblk = __va(phys);
> +	set_xen_guest_handle(numa_topo.vmemblk, vblk);
Please group all the memory allocations followed by grouping the
initialization of the hypercall arguments.
> +	if (HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo) < 0)
> +		goto errout;
ret = HYPERVISOR_memory_op(...);
if (ret < 0)
     goto out;
> +	if (numa_topo.nr_nodes == 0)
> +		/* Pass to DUMMY numa */
> +		goto errout;
> +	if (numa_topo.nr_nodes > num_possible_cpus()) {
> +		pr_info("vNUMA: Node without cpu is not supported, nodes =
%d\n", numa_topo.nr_nodes);
> +		goto errout;
> +	}
Is this a constraint of the Linux NUMA support?
> +	new_end_pfn = 0;
> +	/* We have sizes in pages received from hypervisor, no holes and ordered
*/
How does this play with the relocate of frames done for dom0 during
initial setup?
> +	for (i = 0; i < numa_topo.nr_nodes; i++) {
> +		start = vblk[i].start;
> +		end = vblk[i].end;
> +		size = end - start;
> +		pr_info("VNMA blk[%d] size is %#010Lx start = %#010Lx end =
%#010Lx\n",
> +			i, size, start, end);
> +		if (numa_add_memblk(i, start, end)) {
> +			pr_info("vNUMA: Could not add memblk[%d]\n", i);
> +			rc = -EAGAIN;
> +			goto errout;
> +		}
> +		node_set(i, numa_nodes_parsed);
> +	}
> +	setup_nr_node_ids();
> +	/* Setting the cpu, apicid to node */
> +	for_each_cpu(cpu, cpu_possible_mask) {
> +		pr_info("Setting up vcpu[%d] to node[%d]\n", cpu,
cpu_to_node[cpu]);
> +		set_apicid_to_node(cpu, cpu_to_node[cpu]);
> +		numa_set_node(cpu, cpu_to_node[cpu]);
> +		__apicid_to_node[cpu] = cpu_to_node[cpu];
> +		cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node[cpu]]);
> +	}
> +	setup_nr_node_ids();
> +	rc = 0;
> +	for (i = 0; i < numa_topo.nr_nodes; i++)
Style: {} around this multi-line body.
> +		for (j = 0; j < numa_topo.nr_nodes; j++) {
> +			numa_set_distance(i, j, *(vdistance + j*numa_topo.nr_nodes + i));
> +			pr_info("distance %d <-> %d = %d\n", i, j, *(vdistance
+ j*numa_topo.nr_nodes + i));
> +		}
> +errout:
This isn''t (always) an error path, name this label "out".
> +	if (phys)
> +		memblock_free(__pa(phys), mem_size);
> +	if (physd)
> +		memblock_free(__pa(physd), dist_size);
> +	if (physc)
> +		memblock_free(__pa(physc), cpu_to_node_size);
> +	if (rc)
> +		pr_debug("XEN: hypercall failed to get vNUMA topology.\n");
Remove this debug print, you already have prints for specific failures.
> +	return rc;
> +}
> +
>  #endif
David

Konrad Rzeszutek Wilk

2013-Aug-27 14:17 UTC

head link

Re: [PATCH RFC 2/2] linux/vnuma: Enables NUMA support for PV guest

On Tue, Aug 27, 2013 at 04:53:00AM -0400, Elena Ufimtseva
wrote:> Enables NUMA support for PV guest and uses vNUMA
> interface to discover topology provided by XEN.
I think this should depend on a hypercall to figure out
whether Xen supports this mechanism ?
> 
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
> ---
>  arch/x86/xen/setup.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
> index 056d11f..7e0c93b 100644
> --- a/arch/x86/xen/setup.c
> +++ b/arch/x86/xen/setup.c
> @@ -561,6 +561,6 @@ void __init xen_arch_setup(void)
>  	WARN_ON(xen_set_default_idle());
>  	fiddle_vdso();
>  #ifdef CONFIG_NUMA
> -	numa_off = 1;
> +	numa_off = 0;
>  #endif
>  }
> -- 
> 1.7.10.4
>

Konrad Rzeszutek Wilk

2013-Aug-27 14:33 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Tue, Aug 27, 2013 at 04:52:59AM -0400, Elena Ufimtseva
wrote:> Uses subop hypercall to request XEN about vnuma topology.
> Sets the memory blocks (aligned by XEN), cpus, distance table
> on boot. NUMA support should be compiled in kernel.
Needs a bit more details (which I am sure you will add in the
next postings). Mostly:
 - How E820 and NUMA information mesh
 - What the hypercall provides
 - What changeset in the Xen provides this hypercall
 - > 
> NUMA support should be compiled in kernel.
> 
> Memory blocks are aligned by xen. Xen is aware of guest initial
> RAM layout.
> If the memory blocks are incorrect, call for default linux numa
> dummy init.
Unfortunatly that won''t happen if you boot this under dom0. It will
find on some AMD machines the AMD Northbridge and try to scan that.
And blow up.

If you look at the git commit that did the ''numa = 0'' you will
see
the backstory.

I think part of enablement of '' numa = 1'' is to wrap it with

	if (xen_initial_domain() && xen_supports_this_hypercall())
		numa = 1;

in the #2 patch.
> 
> Requires XEN with vNUMA support.
> 
> Applies to 3.10
git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git
> latest commit: 8bb495e3f02401ee6f76d1b1d77f3ac9f079e376
> 
> TODO:
> Change subop from XENMEM to SYSCTL.
> 
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
> ---
>  arch/x86/include/asm/xen/vnuma-xen.h |   36 +++++++++++
>  arch/x86/mm/numa.c                   |    8 +++
>  arch/x86/xen/enlighten.c             |  115
++++++++++++++++++++++++++++++++++
>  include/xen/interface/memory.h       |    1 +
>  4 files changed, 160 insertions(+)
>  create mode 100644 arch/x86/include/asm/xen/vnuma-xen.h
> 
> diff --git a/arch/x86/include/asm/xen/vnuma-xen.h
b/arch/x86/include/asm/xen/vnuma-xen.h
> new file mode 100644
> index 0000000..73c1bde
> --- /dev/null
> +++ b/arch/x86/include/asm/xen/vnuma-xen.h
> @@ -0,0 +1,36 @@
> +#ifndef _ASM_X86_VNUMA_XEN_H
> +#define _ASM_X86_VNUMA_XEN_H
> +
> +#ifdef CONFIG_XEN
> +int __initdata xen_numa_init(void);
> +
> +struct vnuma_memblk {
> +	          uint64_t start, end;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_memblk);
> +
> +struct vnuma_topology_info {
> +	domid_t domid;
> +	uint16_t nr_nodes;
> +	GUEST_HANDLE(vnuma_memblk) vmemblk;
> +	GUEST_HANDLE(int) vdistance;
> +	GUEST_HANDLE(int) cpu_to_node;
> +	GUEST_HANDLE(int) vnode_to_pnode;
A comment explaining what those values are meant to have would be good.
Perhaps with a simple example.
> +};
The structure is not very 32-bit friendly. Would it be possible
to add some padding so that the size of this structure is the
same on 32-bit and 64-bit please?

Perhaps:
	domid_t domid;
	uint16_t nr_nodes;
	uint32_t _pad;
	GUEST_HANDLE(vnuma_memblk) vmemblk;
	GUEST_HANDLE(int) vdistance;
	GUEST_HANDLE(int) cpu_to_node;
	GUEST_HANDLE(int) vnode_to_pnode;

That should make the offsets be the same on both 32 and 64-bit
I think.
> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info);
> +
> +struct xen_domctl {
> +	uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
> +	domid_t  domain;
> +	union {
> +		struct vnuma_topology_info vnuma;
> +		} u;
Move that ''u'' one tab back please.
> +};
> +typedef struct xen_domctl xen_domctl_t;
Ewww. No typdefs in the Linux code please.
> +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_t);
> +
> +#else
> +int __init xen_numa_init(void) {}
> +#endif
> +
> +#endif /* _ASM_X86_VNUMA_XEN_H */
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 8bf93ba..3ec7855 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -20,6 +20,10 @@
>  
>  #include "numa_internal.h"
>  
> +#ifdef CONFIG_XEN
> +#include "asm/xen/vnuma-xen.h"
> +#endif
> +
>  int __initdata numa_off;
>  nodemask_t numa_nodes_parsed __initdata;
>  
> @@ -621,6 +625,10 @@ static int __init dummy_numa_init(void)
>  void __init x86_numa_init(void)
>  {
>  	if (!numa_off) {
> +#ifdef CONFIG_XEN
> +		if (xen_pv_domain() && !numa_init(xen_numa_init))
I would remove the xen_pv_domain() check and add that in enlighten.c code.
> +			return;
> +#endif
>  #ifdef CONFIG_X86_NUMAQ
>  		if (!numa_init(numaq_numa_init))
>  			return;
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 193097e..4658e9d 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -33,6 +33,7 @@
>  #include <linux/memblock.h>
>  #include <linux/edd.h>
>  
> +#include <asm/numa.h>
>  #include <xen/xen.h>
>  #include <xen/events.h>
>  #include <xen/interface/xen.h>
> @@ -69,6 +70,7 @@
>  #include <asm/mwait.h>
>  #include <asm/pci_x86.h>
>  #include <asm/pat.h>
> +#include <asm/xen/vnuma-xen.h>
>  
>  #ifdef CONFIG_ACPI
>  #include <linux/acpi.h>
> @@ -1750,4 +1752,117 @@ const struct hypervisor_x86 x86_hyper_xen_hvm
__refconst = {
>  	.x2apic_available	= xen_x2apic_para_available,
>  };
>  EXPORT_SYMBOL(x86_hyper_xen_hvm);
> +
> +/* Xen PV NUMA topology initialization */
> +int __initdata xen_numa_init(void)
> +{
> +	struct vnuma_memblk *vblk;
> +	struct vnuma_topology_info numa_topo;
> +	uint64_t size, start, end;
> +	int i, j, cpu, rc;
> +	u64 phys, physd, physc;
> +	u64 new_end_pfn;
> +	size_t mem_size;
> +
> +	int *vdistance, *cpu_to_node;
> +	unsigned long dist_size, cpu_to_node_size;
You need an extra line here;
> +	numa_topo.domid = DOMID_SELF;
> +	rc = -EINVAL;
Both of those can be part of the decleration of variables above. Like so:
	int rc = -EINVAL;
	struct vnuma_topology_info numa_topo = {
		.domid = DOMID_SELF;
	};
> +	/* block mem reservation for memblks */
> +	mem_size = num_possible_cpus() * sizeof(struct vnuma_memblk);
> +	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), mem_size,
PAGE_SIZE);
> +	if (!phys) {
> +		pr_info("vNUMA: can''t allocate memory for membloks size
%zu\n", mem_size);
> +		rc = -ENOMEM;
> +		goto errout;
> +	}
> +	if (memblock_reserve(phys, mem_size) < 0) {
> +		pr_info("vNUMA: Cannot reserver mem for blocks of size %zu
\n", mem_size);
> +		rc = -ENOMEM;
> +		goto errout;
> +	}
> +	vblk = __va(phys);
> +	set_xen_guest_handle(numa_topo.vmemblk, vblk);
> +
> +	dist_size = num_possible_cpus() * num_possible_cpus() *
sizeof(*numa_topo.vdistance);
> +	physd = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), dist_size,
PAGE_SIZE);
> +	if (!physd) {
> +		pr_info("vNUMA: can''t allocate memory for distance size
%zu\n", dist_size);
> +		rc = -ENOMEM;
> +		goto errout;
> +	}
> +	if (memblock_reserve(physd, dist_size) < 0) {
> +		pr_info("vNUMA: Cannot reserver mem for blocks of size %zu
\n", dist_size);
> +		rc = -ENOMEM;
> +		goto errout;
> +	}
> +	vdistance  = __va(physd);
> +	set_xen_guest_handle(numa_topo.vdistance, (int *)vdistance);
> +
> +	/* allocate memblock for vcpu to node mask of max size */
> +	cpu_to_node_size = num_possible_cpus() * sizeof(*numa_topo.cpu_to_node);
> +	physc = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
cpu_to_node_size, PAGE_SIZE);
> +	if (!physc) {
> +		pr_info("vNUMA: can''t allocate memory for distance size
%zu\n", cpu_to_node_size);
> +		rc = -ENOMEM;
> +		goto errout;
> +	}
> +	if (memblock_reserve(physc, cpu_to_node_size) < 0) {
> +		pr_info("vNUMA: Cannot reserver mem for blocks of size %zu\n",
cpu_to_node_size);
> +		goto errout;
> +	}
> +	cpu_to_node  = __va(physc);
> +	set_xen_guest_handle(numa_topo.cpu_to_node, (int *)cpu_to_node);
> +	if (HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo) < 0)
I think you need to do rc = HYPERVISOR...

and then if you get rc = -ENOSYS (so not implemented) change the rc to zero.
> +		goto errout;
> +	if (numa_topo.nr_nodes == 0)
> +		/* Pass to DUMMY numa */
Also change rc to zero.
> +		goto errout;
> +	if (numa_topo.nr_nodes > num_possible_cpus()) {
> +		pr_info("vNUMA: Node without cpu is not supported, nodes =
%d\n", numa_topo.nr_nodes);
> +		goto errout;
> +	}
> +	new_end_pfn = 0;
> +	/* We have sizes in pages received from hypervisor, no holes and ordered
*/
From toolstack you mean. It is the one setting it up. And I hope it sets it up
based on the E820 it has constructed as well? Otherwise it would be a bit
awkward if those two were different.
> +	for (i = 0; i < numa_topo.nr_nodes; i++) {
> +		start = vblk[i].start;
> +		end = vblk[i].end;
> +		size = end - start;
> +		pr_info("VNMA blk[%d] size is %#010Lx start = %#010Lx end =
%#010Lx\n",
> +			i, size, start, end);
pr_debug perhaps?
> +		if (numa_add_memblk(i, start, end)) {
> +			pr_info("vNUMA: Could not add memblk[%d]\n", i);
> +			rc = -EAGAIN;
Can you unroll the NUMA topology if this fails? Or is that not a problem?
> +			goto errout;
> +		}
> +		node_set(i, numa_nodes_parsed);
> +	}
> +	setup_nr_node_ids();
> +	/* Setting the cpu, apicid to node */
> +	for_each_cpu(cpu, cpu_possible_mask) {
> +		pr_info("Setting up vcpu[%d] to node[%d]\n", cpu,
cpu_to_node[cpu]);
pr_debug> +		set_apicid_to_node(cpu, cpu_to_node[cpu]);
> +		numa_set_node(cpu, cpu_to_node[cpu]);
> +		__apicid_to_node[cpu] = cpu_to_node[cpu];
> +		cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node[cpu]]);
> +	}
> +	setup_nr_node_ids();
How come this is being called twice? It should have a comment
explaining this extra call.
> +	rc = 0;
> +	for (i = 0; i < numa_topo.nr_nodes; i++)
> +		for (j = 0; j < numa_topo.nr_nodes; j++) {
> +			numa_set_distance(i, j, *(vdistance + j*numa_topo.nr_nodes + i));
You could simply this by just doing:

			int idx = (j * numa_topo.nr_nodes) + i;

			... *(vdistance + idx));
> +			pr_info("distance %d <-> %d = %d\n", i, j, *(vdistance
+ j*numa_topo.nr_nodes + i));
pr_debug
> +		}
> +errout:
> +	if (phys)
> +		memblock_free(__pa(phys), mem_size);
> +	if (physd)
> +		memblock_free(__pa(physd), dist_size);
> +	if (physc)
> +		memblock_free(__pa(physc), cpu_to_node_size);
> +	if (rc)
> +		pr_debug("XEN: hypercall failed to get vNUMA topology.\n");
And include the ''rc'' value in the output
please.> +	return rc;
> +}
> +
>  #endif
> diff --git a/include/xen/interface/memory.h
b/include/xen/interface/memory.h
> index 2ecfe4f..16b8d87 100644
> --- a/include/xen/interface/memory.h
> +++ b/include/xen/interface/memory.h
> @@ -263,4 +263,5 @@ struct xen_remove_from_physmap {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
>  
> +#define XENMEM_get_vnuma_info	25
>  #endif /* __XEN_PUBLIC_MEMORY_H__ */
> -- 
> 1.7.10.4
>

Konrad Rzeszutek Wilk

2013-Aug-27 15:36 UTC

head link

Re: [PATCH RFC 2/2] linux/vnuma: Enables NUMA support for PV guest

On Tue, Aug 27, 2013 at 02:35:18PM +0100, David Vrabel
wrote:> On 27/08/13 09:53, Elena Ufimtseva wrote:
> > Enables NUMA support for PV guest and uses vNUMA
> > interface to discover topology provided by XEN.
> > 
> > Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
> > ---
> >  arch/x86/xen/setup.c |    2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
> > index 056d11f..7e0c93b 100644
> > --- a/arch/x86/xen/setup.c
> > +++ b/arch/x86/xen/setup.c
> > @@ -561,6 +561,6 @@ void __init xen_arch_setup(void)
> >  	WARN_ON(xen_set_default_idle());
> >  	fiddle_vdso();
> >  #ifdef CONFIG_NUMA
> > -	numa_off = 1;
> > +	numa_off = 0;
> >  #endif
> >  }
> 
> Hmm. This is either safe to turn on now or it will break on hosts
> without vNUMA support as it will cause a fallback to try all the
> hardware specific NUMA initialization.
> 
> Which is it?
It will blow up when booting as dom0 on certain AMD machines.
> 
> David
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

Matt Wilson

2013-Aug-28 01:27 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Tue, Aug 27, 2013 at 04:52:59AM -0400, Elena Ufimtseva
wrote:> Uses subop hypercall to request XEN about vnuma topology.
> Sets the memory blocks (aligned by XEN), cpus, distance table
> on boot. NUMA support should be compiled in kernel.
Are we *really sure* that we want to go this route for PV vNUMA?
Couldn''t we build just enough(tm) of the ACPI tables to express the
NUMA topology when constructing the domain? That''s what we do for the
e820 map.

--msw
> NUMA support should be compiled in kernel.
> 
> Memory blocks are aligned by xen. Xen is aware of guest initial
> RAM layout.
> If the memory blocks are incorrect, call for default linux numa
> dummy init.
> 
> Requires XEN with vNUMA support.
> 
> Applies to 3.10
git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git
> latest commit: 8bb495e3f02401ee6f76d1b1d77f3ac9f079e376

Matt Wilson

2013-Aug-28 01:37 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Tue, Aug 27, 2013 at 06:27:15PM -0700, Matt Wilson
wrote:> On Tue, Aug 27, 2013 at 04:52:59AM -0400, Elena Ufimtseva wrote:
> > Uses subop hypercall to request XEN about vnuma topology.
> > Sets the memory blocks (aligned by XEN), cpus, distance table
> > on boot. NUMA support should be compiled in kernel.
> 
> Are we *really sure* that we want to go this route for PV vNUMA?
> Couldn''t we build just enough(tm) of the ACPI tables to express
the
> NUMA topology when constructing the domain? That''s what we do for
the
> e820 map.
Ignore me somewhat, since the e820 information is retrieved via
hypercall similar to what you''re proposing.

Still, if there''s some way that we can reuse existing Linux code
rather than bolting on a completely parallel mechanism to set this up
under PV I think it''d be better.

--msw

Dario Faggioli

2013-Aug-28 13:08 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On mar, 2013-08-27 at 18:37 -0700, Matt Wilson wrote:> On Tue, Aug 27, 2013 at 06:27:15PM -0700, Matt Wilson wrote:
> > On Tue, Aug 27, 2013 at 04:52:59AM -0400, Elena Ufimtseva wrote:
> > > Uses subop hypercall to request XEN about vnuma topology.
> > > Sets the memory blocks (aligned by XEN), cpus, distance table
> > > on boot. NUMA support should be compiled in kernel.
> > 
> > Are we *really sure* that we want to go this route for PV vNUMA?
> > Couldn''t we build just enough(tm) of the ACPI tables to
express the
> > NUMA topology when constructing the domain? That''s what we do
for the
> > e820 map.
> 
> Ignore me somewhat, since the e820 information is retrieved via
> hypercall similar to what you''re proposing.
> :-)
> Still, if there''s some way that we can reuse existing Linux code
> rather than bolting on a completely parallel mechanism to set this up
> under PV I think it''d be better.
> Well, it looks to me that Elena is reusing quite a bit of it, isn''t
she?
All she''s providing is a new initialization function ( xen_numa_init()
), as it is happening already for ACPI NUMA, NUMAQ, and other NUMA
implementations.

In practice, while ACPI based NUMA code parses the ACPI tables in
acpi_numa_init(), PV vNUMA parses the information coming from an
hypercall xen_numa_init(). From that point on, Linux that steps-in and
do everything else "as usual".

Isn''t that enough sharing?

Thanks and Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

George Dunlap

2013-Aug-28 13:39 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Wed, Aug 28, 2013 at 2:08 PM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:> On mar, 2013-08-27 at 18:37 -0700, Matt Wilson wrote:
>> On Tue, Aug 27, 2013 at 06:27:15PM -0700, Matt Wilson wrote:
>> > On Tue, Aug 27, 2013 at 04:52:59AM -0400, Elena Ufimtseva wrote:
>> > > Uses subop hypercall to request XEN about vnuma topology.
>> > > Sets the memory blocks (aligned by XEN), cpus, distance table
>> > > on boot. NUMA support should be compiled in kernel.
>> >
>> > Are we *really sure* that we want to go this route for PV vNUMA?
>> > Couldn''t we build just enough(tm) of the ACPI tables to
express the
>> > NUMA topology when constructing the domain? That''s what
we do for the
>> > e820 map.
>>
>> Ignore me somewhat, since the e820 information is retrieved via
>> hypercall similar to what you''re proposing.
>>
> :-)
>
>> Still, if there''s some way that we can reuse existing Linux
code
>> rather than bolting on a completely parallel mechanism to set this up
>> under PV I think it''d be better.
>>
> Well, it looks to me that Elena is reusing quite a bit of it,
isn''t she?
> All she''s providing is a new initialization function (
xen_numa_init()
> ), as it is happening already for ACPI NUMA, NUMAQ, and other NUMA
> implementations.
>
> In practice, while ACPI based NUMA code parses the ACPI tables in
> acpi_numa_init(), PV vNUMA parses the information coming from an
> hypercall xen_numa_init(). From that point on, Linux that steps-in and
> do everything else "as usual".
>
> Isn''t that enough sharing?
I think the only way to "share" more would be to have Xen do some
crazy ACPI table fake-up scheme, which sounds like kind of a
nightmare; and also completely pointless, since there are already nice
clean interfaces we can just hook into and pass nice clean data
structures straight from Xen.

 -George

Elena Ufimtseva

2013-Aug-28 13:41 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Hi Konrad

Thank you for comments, I will resend the fix in the new patch.
Regarding memblocks contrustion they are done taking into account e820
map for this domain.
I will place some comments in code as you mentioned.

Elena

On Tue, Aug 27, 2013 at 10:33 AM, Konrad Rzeszutek Wilk
<konrad.wilk@oracle.com> wrote:> On Tue, Aug 27, 2013 at 04:52:59AM -0400, Elena Ufimtseva wrote:
>> Uses subop hypercall to request XEN about vnuma topology.
>> Sets the memory blocks (aligned by XEN), cpus, distance table
>> on boot. NUMA support should be compiled in kernel.
>
> Needs a bit more details (which I am sure you will add in the
> next postings). Mostly:
>  - How E820 and NUMA information mesh
>  - What the hypercall provides
>  - What changeset in the Xen provides this hypercall
>  -
>>
>> NUMA support should be compiled in kernel.
>>
>> Memory blocks are aligned by xen. Xen is aware of guest initial
>> RAM layout.
>> If the memory blocks are incorrect, call for default linux numa
>> dummy init.
>
> Unfortunatly that won''t happen if you boot this under dom0. It
will
> find on some AMD machines the AMD Northbridge and try to scan that.
> And blow up.
>
> If you look at the git commit that did the ''numa = 0'' you
will see
> the backstory.
>
> I think part of enablement of '' numa = 1'' is to wrap it
with
>
>         if (xen_initial_domain() && xen_supports_this_hypercall())
>                 numa = 1;
>
> in the #2 patch.
>
>>
>> Requires XEN with vNUMA support.
>>
>> Applies to 3.10
git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git
>> latest commit: 8bb495e3f02401ee6f76d1b1d77f3ac9f079e376
>>
>> TODO:
>> Change subop from XENMEM to SYSCTL.
>>
>> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
>> ---
>>  arch/x86/include/asm/xen/vnuma-xen.h |   36 +++++++++++
>>  arch/x86/mm/numa.c                   |    8 +++
>>  arch/x86/xen/enlighten.c             |  115
++++++++++++++++++++++++++++++++++
>>  include/xen/interface/memory.h       |    1 +
>>  4 files changed, 160 insertions(+)
>>  create mode 100644 arch/x86/include/asm/xen/vnuma-xen.h
>>
>> diff --git a/arch/x86/include/asm/xen/vnuma-xen.h
b/arch/x86/include/asm/xen/vnuma-xen.h
>> new file mode 100644
>> index 0000000..73c1bde
>> --- /dev/null
>> +++ b/arch/x86/include/asm/xen/vnuma-xen.h
>> @@ -0,0 +1,36 @@
>> +#ifndef _ASM_X86_VNUMA_XEN_H
>> +#define _ASM_X86_VNUMA_XEN_H
>> +
>> +#ifdef CONFIG_XEN
>> +int __initdata xen_numa_init(void);
>> +
>> +struct vnuma_memblk {
>> +               uint64_t start, end;
>> +};
>> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_memblk);
>> +
>> +struct vnuma_topology_info {
>> +     domid_t domid;
>> +     uint16_t nr_nodes;
>> +     GUEST_HANDLE(vnuma_memblk) vmemblk;
>> +     GUEST_HANDLE(int) vdistance;
>> +     GUEST_HANDLE(int) cpu_to_node;
>> +     GUEST_HANDLE(int) vnode_to_pnode;
>
> A comment explaining what those values are meant to have would be good.
> Perhaps with a simple example.
>
>> +};
>
> The structure is not very 32-bit friendly. Would it be possible
> to add some padding so that the size of this structure is the
> same on 32-bit and 64-bit please?
>
> Perhaps:
>         domid_t domid;
>         uint16_t nr_nodes;
>         uint32_t _pad;
>         GUEST_HANDLE(vnuma_memblk) vmemblk;
>         GUEST_HANDLE(int) vdistance;
>         GUEST_HANDLE(int) cpu_to_node;
>         GUEST_HANDLE(int) vnode_to_pnode;
>
> That should make the offsets be the same on both 32 and 64-bit
> I think.
>
>> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info);
>> +
>> +struct xen_domctl {
>> +     uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
>> +     domid_t  domain;
>> +     union {
>> +             struct vnuma_topology_info vnuma;
>> +             } u;
>
> Move that ''u'' one tab back please.
>
>> +};
>> +typedef struct xen_domctl xen_domctl_t;
>
> Ewww. No typdefs in the Linux code please.
>
>> +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_t);
>> +
>> +#else
>> +int __init xen_numa_init(void) {}
>> +#endif
>> +
>> +#endif /* _ASM_X86_VNUMA_XEN_H */
>> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
>> index 8bf93ba..3ec7855 100644
>> --- a/arch/x86/mm/numa.c
>> +++ b/arch/x86/mm/numa.c
>> @@ -20,6 +20,10 @@
>>
>>  #include "numa_internal.h"
>>
>> +#ifdef CONFIG_XEN
>> +#include "asm/xen/vnuma-xen.h"
>> +#endif
>> +
>>  int __initdata numa_off;
>>  nodemask_t numa_nodes_parsed __initdata;
>>
>> @@ -621,6 +625,10 @@ static int __init dummy_numa_init(void)
>>  void __init x86_numa_init(void)
>>  {
>>       if (!numa_off) {
>> +#ifdef CONFIG_XEN
>> +             if (xen_pv_domain() && !numa_init(xen_numa_init))
>
> I would remove the xen_pv_domain() check and add that in enlighten.c code.
>
>> +                     return;
>> +#endif
>>  #ifdef CONFIG_X86_NUMAQ
>>               if (!numa_init(numaq_numa_init))
>>                       return;
>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>> index 193097e..4658e9d 100644
>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
>> @@ -33,6 +33,7 @@
>>  #include <linux/memblock.h>
>>  #include <linux/edd.h>
>>
>> +#include <asm/numa.h>
>>  #include <xen/xen.h>
>>  #include <xen/events.h>
>>  #include <xen/interface/xen.h>
>> @@ -69,6 +70,7 @@
>>  #include <asm/mwait.h>
>>  #include <asm/pci_x86.h>
>>  #include <asm/pat.h>
>> +#include <asm/xen/vnuma-xen.h>
>>
>>  #ifdef CONFIG_ACPI
>>  #include <linux/acpi.h>
>> @@ -1750,4 +1752,117 @@ const struct hypervisor_x86 x86_hyper_xen_hvm
__refconst = {
>>       .x2apic_available       = xen_x2apic_para_available,
>>  };
>>  EXPORT_SYMBOL(x86_hyper_xen_hvm);
>> +
>> +/* Xen PV NUMA topology initialization */
>> +int __initdata xen_numa_init(void)
>> +{
>> +     struct vnuma_memblk *vblk;
>> +     struct vnuma_topology_info numa_topo;
>> +     uint64_t size, start, end;
>> +     int i, j, cpu, rc;
>> +     u64 phys, physd, physc;
>> +     u64 new_end_pfn;
>> +     size_t mem_size;
>> +
>> +     int *vdistance, *cpu_to_node;
>> +     unsigned long dist_size, cpu_to_node_size;
>
> You need an extra line here;
>
>> +     numa_topo.domid = DOMID_SELF;
>> +     rc = -EINVAL;
>
> Both of those can be part of the decleration of variables above. Like so:
>         int rc = -EINVAL;
>         struct vnuma_topology_info numa_topo = {
>                 .domid = DOMID_SELF;
>         };
>
>> +     /* block mem reservation for memblks */
>> +     mem_size = num_possible_cpus() * sizeof(struct vnuma_memblk);
>> +     phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
mem_size, PAGE_SIZE);
>> +     if (!phys) {
>> +             pr_info("vNUMA: can''t allocate memory for
membloks size %zu\n", mem_size);
>> +             rc = -ENOMEM;
>> +             goto errout;
>> +     }
>> +     if (memblock_reserve(phys, mem_size) < 0) {
>> +             pr_info("vNUMA: Cannot reserver mem for blocks of
size %zu \n", mem_size);
>> +             rc = -ENOMEM;
>> +             goto errout;
>> +     }
>> +     vblk = __va(phys);
>> +     set_xen_guest_handle(numa_topo.vmemblk, vblk);
>> +
>> +     dist_size = num_possible_cpus() * num_possible_cpus() *
sizeof(*numa_topo.vdistance);
>> +     physd = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
dist_size, PAGE_SIZE);
>> +     if (!physd) {
>> +             pr_info("vNUMA: can''t allocate memory for
distance size %zu\n", dist_size);
>> +             rc = -ENOMEM;
>> +             goto errout;
>> +     }
>> +     if (memblock_reserve(physd, dist_size) < 0) {
>> +             pr_info("vNUMA: Cannot reserver mem for blocks of
size %zu \n", dist_size);
>> +             rc = -ENOMEM;
>> +             goto errout;
>> +     }
>> +     vdistance  = __va(physd);
>> +     set_xen_guest_handle(numa_topo.vdistance, (int *)vdistance);
>> +
>> +     /* allocate memblock for vcpu to node mask of max size */
>> +     cpu_to_node_size = num_possible_cpus() *
sizeof(*numa_topo.cpu_to_node);
>> +     physc = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
cpu_to_node_size, PAGE_SIZE);
>> +     if (!physc) {
>> +             pr_info("vNUMA: can''t allocate memory for
distance size %zu\n", cpu_to_node_size);
>> +             rc = -ENOMEM;
>> +             goto errout;
>> +     }
>> +     if (memblock_reserve(physc, cpu_to_node_size) < 0) {
>> +             pr_info("vNUMA: Cannot reserver mem for blocks of
size %zu\n", cpu_to_node_size);
>> +             goto errout;
>> +     }
>> +     cpu_to_node  = __va(physc);
>> +     set_xen_guest_handle(numa_topo.cpu_to_node, (int *)cpu_to_node);
>> +     if (HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo)
< 0)
>
> I think you need to do rc = HYPERVISOR...
>
> and then if you get rc = -ENOSYS (so not implemented) change the rc to
zero.
>
>> +             goto errout;
>> +     if (numa_topo.nr_nodes == 0)
>> +             /* Pass to DUMMY numa */
>
> Also change rc to zero.
>
>> +             goto errout;
>> +     if (numa_topo.nr_nodes > num_possible_cpus()) {
>> +             pr_info("vNUMA: Node without cpu is not supported,
nodes = %d\n", numa_topo.nr_nodes);
>> +             goto errout;
>> +     }
>> +     new_end_pfn = 0;
>> +     /* We have sizes in pages received from hypervisor, no holes and
ordered */
>
> From toolstack you mean. It is the one setting it up. And I hope it sets it
up
> based on the E820 it has constructed as well? Otherwise it would be a bit
> awkward if those two were different.
>
>> +     for (i = 0; i < numa_topo.nr_nodes; i++) {
>> +             start = vblk[i].start;
>> +             end = vblk[i].end;
>> +             size = end - start;
>> +             pr_info("VNMA blk[%d] size is %#010Lx start =
%#010Lx end = %#010Lx\n",
>> +                     i, size, start, end);
>
> pr_debug perhaps?
>
>> +             if (numa_add_memblk(i, start, end)) {
>> +                     pr_info("vNUMA: Could not add
memblk[%d]\n", i);
>> +                     rc = -EAGAIN;
>
> Can you unroll the NUMA topology if this fails? Or is that not a problem?
>
>> +                     goto errout;
>> +             }
>> +             node_set(i, numa_nodes_parsed);
>> +     }
>> +     setup_nr_node_ids();
>> +     /* Setting the cpu, apicid to node */
>> +     for_each_cpu(cpu, cpu_possible_mask) {
>> +             pr_info("Setting up vcpu[%d] to node[%d]\n",
cpu, cpu_to_node[cpu]);
>
> pr_debug
>> +             set_apicid_to_node(cpu, cpu_to_node[cpu]);
>> +             numa_set_node(cpu, cpu_to_node[cpu]);
>> +             __apicid_to_node[cpu] = cpu_to_node[cpu];
>> +             cpumask_set_cpu(cpu,
node_to_cpumask_map[cpu_to_node[cpu]]);
>> +     }
>> +     setup_nr_node_ids();
>
> How come this is being called twice? It should have a comment
> explaining this extra call.
>
>> +     rc = 0;
>> +     for (i = 0; i < numa_topo.nr_nodes; i++)
>> +             for (j = 0; j < numa_topo.nr_nodes; j++) {
>> +                     numa_set_distance(i, j, *(vdistance +
j*numa_topo.nr_nodes + i));
>
> You could simply this by just doing:
>
>                         int idx = (j * numa_topo.nr_nodes) + i;
>
>                         ... *(vdistance + idx));
>
>> +                     pr_info("distance %d <-> %d =
%d\n", i, j, *(vdistance + j*numa_topo.nr_nodes + i));
>
> pr_debug
>
>> +             }
>> +errout:
>> +     if (phys)
>> +             memblock_free(__pa(phys), mem_size);
>> +     if (physd)
>> +             memblock_free(__pa(physd), dist_size);
>> +     if (physc)
>> +             memblock_free(__pa(physc), cpu_to_node_size);
>> +     if (rc)
>> +             pr_debug("XEN: hypercall failed to get vNUMA
topology.\n");
>
> And include the ''rc'' value in the output please.
>> +     return rc;
>> +}
>> +
>>  #endif
>> diff --git a/include/xen/interface/memory.h
b/include/xen/interface/memory.h
>> index 2ecfe4f..16b8d87 100644
>> --- a/include/xen/interface/memory.h
>> +++ b/include/xen/interface/memory.h
>> @@ -263,4 +263,5 @@ struct xen_remove_from_physmap {
>>  };
>>  DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
>>
>> +#define XENMEM_get_vnuma_info        25
>>  #endif /* __XEN_PUBLIC_MEMORY_H__ */
>> --
>> 1.7.10.4
>>


-- 
Elena

Konrad Rzeszutek Wilk

2013-Aug-28 16:01 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Tue, Aug 27, 2013 at 06:37:35PM -0700, Matt Wilson
wrote:> On Tue, Aug 27, 2013 at 06:27:15PM -0700, Matt Wilson wrote:
> > On Tue, Aug 27, 2013 at 04:52:59AM -0400, Elena Ufimtseva wrote:
> > > Uses subop hypercall to request XEN about vnuma topology.
> > > Sets the memory blocks (aligned by XEN), cpus, distance table
> > > on boot. NUMA support should be compiled in kernel.
> > 
> > Are we *really sure* that we want to go this route for PV vNUMA?
> > Couldn''t we build just enough(tm) of the ACPI tables to
express the
> > NUMA topology when constructing the domain? That''s what we do
for the
> > e820 map.
> 
> Ignore me somewhat, since the e820 information is retrieved via
> hypercall similar to what you''re proposing.
> 
> Still, if there''s some way that we can reuse existing Linux code
> rather than bolting on a completely parallel mechanism to set this up
> under PV I think it''d be better.
That would also parallel the work you do with ACPI right?

We could enable ACPI parsing in a PV guest and provide one table - the
SLIT (or SRAT).

But I don''t know enough about SRAT to know whether this is something
that represents truly everything we need?> 
> --msw
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

Matt Wilson

2013-Aug-28 16:38 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Wed, Aug 28, 2013 at 12:01:48PM -0400, Konrad Rzeszutek Wilk
wrote:> On Tue, Aug 27, 2013 at 06:37:35PM -0700, Matt Wilson wrote:
> > On Tue, Aug 27, 2013 at 06:27:15PM -0700, Matt Wilson wrote:
> > > On Tue, Aug 27, 2013 at 04:52:59AM -0400, Elena Ufimtseva wrote:
> > > > Uses subop hypercall to request XEN about vnuma topology.
> > > > Sets the memory blocks (aligned by XEN), cpus, distance
table
> > > > on boot. NUMA support should be compiled in kernel.
> > > 
> > > Are we *really sure* that we want to go this route for PV vNUMA?
> > > Couldn''t we build just enough(tm) of the ACPI tables to
express the
> > > NUMA topology when constructing the domain? That''s what
we do for the
> > > e820 map.
> > 
> > Ignore me somewhat, since the e820 information is retrieved via
> > hypercall similar to what you''re proposing.
> > 
> > Still, if there''s some way that we can reuse existing Linux
code
> > rather than bolting on a completely parallel mechanism to set this up
> > under PV I think it''d be better.
> 
> That would also parallel the work you do with ACPI right?
Yes.
> We could enable ACPI parsing in a PV guest and provide one table - the
> SLIT (or SRAT).
Right, it''d be the SRAT table for the resource affinity and a SLIT
table for the locality/distance information.
> But I don''t know enough about SRAT to know whether this is
something
> that represents truly everything we need?
The SRAT table has processor objects and memory objects. A processor
object maps a logical processor number to its initial APIC ID and
provides the node information. A memory object specifies the start and
length for a memory region and provides the node information.

For SLIT, the entries are a matrix of distances.

Here are the structs:

struct acpi_20_srat_processor {
    uint8_t type;
    uint8_t length;
    uint8_t domain;
    uint8_t apic_id;
    uint32_t flags;
    uint8_t sapic_id;
    uint8_t domain_hi[3];
    uint32_t reserved;
};

struct acpi_20_srat_memory {
    uint8_t type;
    uint8_t length;
    uint8_t domain;
    uint8_t domain_hi[3];      /* this is ACPI 3.0, reserved in 2.0 */
    uint16_t reserved;
    uint32_t base_address_lo;
    uint32_t base_address_hi;
    uint32_t length_lo;
    uint32_t length_hi;
    uint32_t reserved2;
    uint32_t flags;
    uint32_t reserved3[2];
};

struct acpi_20_slit {
    struct acpi_header header;
    uint64_t nr_sys_loc;
    uint8_t  entry[];
};

--msw

Elena Ufimtseva

2013-Aug-28 20:10 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Hi Matt

Do you construct these tables via hypercalls?
Will be good to see the details on how you do it and if I can use it as well.
Thank you.

Elena

On Wed, Aug 28, 2013 at 12:38 PM, Matt Wilson <msw@amazon.com>
wrote:> On Wed, Aug 28, 2013 at 12:01:48PM -0400, Konrad Rzeszutek Wilk wrote:
>> On Tue, Aug 27, 2013 at 06:37:35PM -0700, Matt Wilson wrote:
>> > On Tue, Aug 27, 2013 at 06:27:15PM -0700, Matt Wilson wrote:
>> > > On Tue, Aug 27, 2013 at 04:52:59AM -0400, Elena Ufimtseva
wrote:
>> > > > Uses subop hypercall to request XEN about vnuma
topology.
>> > > > Sets the memory blocks (aligned by XEN), cpus, distance
table
>> > > > on boot. NUMA support should be compiled in kernel.
>> > >
>> > > Are we *really sure* that we want to go this route for PV
vNUMA?
>> > > Couldn''t we build just enough(tm) of the ACPI tables
to express the
>> > > NUMA topology when constructing the domain? That''s
what we do for the
>> > > e820 map.
>> >
>> > Ignore me somewhat, since the e820 information is retrieved via
>> > hypercall similar to what you''re proposing.
>> >
>> > Still, if there''s some way that we can reuse existing
Linux code
>> > rather than bolting on a completely parallel mechanism to set this
up
>> > under PV I think it''d be better.
>>
>> That would also parallel the work you do with ACPI right?
>
> Yes.
>
>> We could enable ACPI parsing in a PV guest and provide one table - the
>> SLIT (or SRAT).
>
> Right, it''d be the SRAT table for the resource affinity and a SLIT
> table for the locality/distance information.
>
>> But I don''t know enough about SRAT to know whether this is
something
>> that represents truly everything we need?
>
> The SRAT table has processor objects and memory objects. A processor
> object maps a logical processor number to its initial APIC ID and
> provides the node information. A memory object specifies the start and
> length for a memory region and provides the node information.
>
> For SLIT, the entries are a matrix of distances.
>
> Here are the structs:
>
> struct acpi_20_srat_processor {
>     uint8_t type;
>     uint8_t length;
>     uint8_t domain;
>     uint8_t apic_id;
>     uint32_t flags;
>     uint8_t sapic_id;
>     uint8_t domain_hi[3];
>     uint32_t reserved;
> };
>
> struct acpi_20_srat_memory {
>     uint8_t type;
>     uint8_t length;
>     uint8_t domain;
>     uint8_t domain_hi[3];      /* this is ACPI 3.0, reserved in 2.0 */
>     uint16_t reserved;
>     uint32_t base_address_lo;
>     uint32_t base_address_hi;
>     uint32_t length_lo;
>     uint32_t length_hi;
>     uint32_t reserved2;
>     uint32_t flags;
>     uint32_t reserved3[2];
> };
>
> struct acpi_20_slit {
>     struct acpi_header header;
>     uint64_t nr_sys_loc;
>     uint8_t  entry[];
> };
>
> --msw


-- 
Elena

Dario Faggioli

2013-Aug-28 22:21 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On mer, 2013-08-28 at 09:38 -0700, Matt Wilson wrote:> On Wed, Aug 28, 2013 at 12:01:48PM -0400, Konrad Rzeszutek Wilk wrote:
> > That would also parallel the work you do with ACPI right?
> 
> Yes.
> I see. It''s hard to comment, since I have only seen some previous (off
list) versiona of Elena''s code (and won''t have time to
properly review
this one untill Monday), and I haven''t seen Matt''s code at
all...

Anyway...
> > We could enable ACPI parsing in a PV guest and provide one table - the
> > SLIT (or SRAT).
> 
> Right, it''d be the SRAT table for the resource affinity and a SLIT
> table for the locality/distance information.
> ... I see the point in sharing code for HVM and PV. However, I''m still
not convinced this would be something valuable to do with this specific
hunk, mostly because it looks really easy and clean to hook up at the
numa_init() stage (i.e., what Elena is doing), that anything else I can
think of looks like more work. :-P
> > But I don''t know enough about SRAT to know whether this is
something
> > that represents truly everything we need?
> 
> The SRAT table has processor objects and memory objects. A processor
> object maps a logical processor number to its initial APIC ID and
> provides the node information. A memory object specifies the start and
> length for a memory region and provides the node information.
> 
> For SLIT, the entries are a matrix of distances.
> 
> Here are the structs:
> 
> [snip]
>Ok, thanks for the very useful info. What would be interesting to know
is where and how Linux reads the information from ACPI and fill these
structures.

The current Elena''s approach is 1 hypercall, during early NUMA
initialization, and that is pretty self-contained (which is the thing I
like most about it).

How easy is it to look up the places where each one of the tables gets
filled, intercept the code/calls doing that, and replace it properly for
our use case? How easy is it to "xen-ify" those call sites (stuff like
''#ifdef CONFIG_XEN'' or/and is_xen_domain() )? How many
hypercalls would
it require? Is it possible to have one doing all the work, or would we
need something like one for each table?

As I said, I can''t check the details about it right now, but it sounds
like more work than Elena''s xen_numa_init().

Thanks and Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Dario Faggioli

2013-Aug-28 23:12 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On mar, 2013-08-27 at 10:33 -0400, Konrad Rzeszutek Wilk
wrote:> Unfortunatly that won''t happen if you boot this under dom0. It
will
> find on some AMD machines the AMD Northbridge and try to scan that.
> And blow up.
> Well, perhaps this was not evident enough in Elena''s mails and patches,
but her current target is actually DomU. :-P

However, that''s only because we''re proceeding in steps, so
yes, once
DomU will be working, Dom0 will become the target, and we''ll have to
start caring about not making things explode! :-O
> If you look at the git commit that did the ''numa = 0'' you
will see
> the backstory.
> That''s something interesting to look at... I remember having seen that
discussion but not the details. I''ll check when back from vacation.
> I think part of enablement of '' numa = 1'' is to wrap it
with
> 
> 	if (xen_initial_domain() && xen_supports_this_hypercall())
> 		numa = 1;
> 
> in the #2 patch.
> Mmm... Did you mean ''!xen_initial_domain()'' ? Or is it me that
did not
understand your comment about Dom0 above?

In case you did mean ''!'', again, this would be fine for now,
since the
target is DomUs. At the point when we will start chasing Dom0, it would
be interesting to find a solution to this issue... :-/

Anyway, thanks for pointing this out.

Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Matt Wilson

2013-Aug-28 23:25 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Wed, Aug 28, 2013 at 04:10:11PM -0400, Elena Ufimtseva
wrote:> Hi Matt
> 
> Do you construct these tables via hypercalls?
> Will be good to see the details on how you do it and if I can use it as
well.
> Thank you.
All the ACPI tables get built in guest memory by hvmloader. The
configuration is passed from libxc into hvmloader via the hvm_info
table.

I can''t take credit for all of this work. Most of it was written and
proposed by Andre Przywara in 2010.

http://lists.xen.org/archives/html/xen-devel/2010-02/msg00280.html

--msw

Matt Wilson

2013-Aug-29 00:11 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Thu, Aug 29, 2013 at 12:21:40AM +0200, Dario Faggioli
wrote:> On mer, 2013-08-28 at 09:38 -0700, Matt Wilson wrote:
> > On Wed, Aug 28, 2013 at 12:01:48PM -0400, Konrad Rzeszutek Wilk wrote:
> > > That would also parallel the work you do with ACPI right?
> > 
> > Yes.
> > 
> I see. It''s hard to comment, since I have only seen some previous
(off
> list) versiona of Elena''s code (and won''t have time to
properly review
> this one untill Monday), and I haven''t seen Matt''s code
at all...
See the link I gave in my other reply for older proposed patch I based
this work on. Here''s the head of the patchset:
  http://lists.xen.org/archives/html/xen-devel/2010-02/msg00279.html
> > > We could enable ACPI parsing in a PV guest and provide one table
- the
> > > SLIT (or SRAT).
> > 
> > Right, it''d be the SRAT table for the resource affinity and a
SLIT
> > table for the locality/distance information.
> > 
> ... I see the point in sharing code for HVM and PV. However, I''m
still
> not convinced this would be something valuable to do with this specific
> hunk, mostly because it looks really easy and clean to hook up at the
> numa_init() stage (i.e., what Elena is doing), that anything else I can
> think of looks like more work. :-P
I agree.
> > > But I don''t know enough about SRAT to know whether this
is something
> > > that represents truly everything we need?
> > 
> > The SRAT table has processor objects and memory objects. A processor
> > object maps a logical processor number to its initial APIC ID and
> > provides the node information. A memory object specifies the start and
> > length for a memory region and provides the node information.
> > 
> > For SLIT, the entries are a matrix of distances.
> > 
> > Here are the structs:
> > 
> > [snip]
> >
> Ok, thanks for the very useful info. What would be interesting to know
> is where and how Linux reads the information from ACPI and fill these
> structures.
> 
> The current Elena''s approach is 1 hypercall, during early NUMA
> initialization, and that is pretty self-contained (which is the thing I
> like most about it).
> 
> How easy is it to look up the places where each one of the tables gets
> filled, intercept the code/calls doing that, and replace it properly for
> our use case? How easy is it to "xen-ify" those call sites (stuff
like
> ''#ifdef CONFIG_XEN'' or/and is_xen_domain() )? How many
hypercalls would
> it require? Is it possible to have one doing all the work, or would we
> need something like one for each table?
I think it wouldn''t be too hard to construct the static ACPI tables
and provide them in acpi_os_get_root_pointer().
> As I said, I can''t check the details about it right now, but it
sounds
> like more work than Elena''s xen_numa_init().
Yes, it''s probably a bit more work.

--msw

George Dunlap

2013-Aug-29 09:16 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Thu, Aug 29, 2013 at 12:25 AM, Matt Wilson <msw@amazon.com>
wrote:> On Wed, Aug 28, 2013 at 04:10:11PM -0400, Elena Ufimtseva wrote:
>> Hi Matt
>>
>> Do you construct these tables via hypercalls?
>> Will be good to see the details on how you do it and if I can use it as
well.
>> Thank you.
>
> All the ACPI tables get built in guest memory by hvmloader.
Our end goal is to be able to have dom0 boot with NUMA, so whatever we
do will have to be constructed in Xen as well.  If making the ACPI
tables is fairly straightforward, that might be an option; but if it''s
really complicated, we probably want to stick with what we have.

 -George

David Vrabel

2013-Aug-29 13:41 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On 29/08/13 01:11, Matt Wilson wrote:> 
> I think it wouldn''t be too hard to construct the static ACPI
tables
> and provide them in acpi_os_get_root_pointer().
Elena''s approach looks fine to me from a Linux point of view.

It''s still not clear how presenting the information via ACPI would be
an
improvement.  I also think it is preferrable for the hypercall ABI for
PV guests to be independent of architecture specifics where possible.

If you posted Xen and Linux series producing/using the ACPI SRAT and
SLIT tables for dom0 and domU PV guests we could better evaluate that
option but until then I''m inclined to consider Elena''s
approach as the
better one.

David

Konrad Rzeszutek Wilk

2013-Aug-29 14:07 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Thu, Aug 29, 2013 at 01:12:15AM +0200, Dario Faggioli
wrote:> On mar, 2013-08-27 at 10:33 -0400, Konrad Rzeszutek Wilk wrote:
> > Unfortunatly that won''t happen if you boot this under dom0.
It will
> > find on some AMD machines the AMD Northbridge and try to scan that.
> > And blow up.
> > 
> Well, perhaps this was not evident enough in Elena''s mails and
patches,
> but her current target is actually DomU. :-P
> 
> However, that''s only because we''re proceeding in steps,
so yes, once
> DomU will be working, Dom0 will become the target, and we''ll have
to
> start caring about not making things explode! :-O
> 
> > If you look at the git commit that did the ''numa =
0'' you will see
> > the backstory.
> > 
> That''s something interesting to look at... I remember having seen
that
> discussion but not the details. I''ll check when back from
vacation.
> 
> > I think part of enablement of '' numa = 1'' is to wrap
it with
> > 
> > 	if (xen_initial_domain() && xen_supports_this_hypercall())
> > 		numa = 1;
> > 
> > in the #2 patch.
> > 
> Mmm... Did you mean ''!xen_initial_domain()'' ? Or is it me
that did not
> understand your comment about Dom0 above?
For right now ''!'' would suffice.> 
> In case you did mean ''!'', again, this would be fine for
now, since the
> target is DomUs. At the point when we will start chasing Dom0, it would
> be interesting to find a solution to this issue... :-/
> 
> Anyway, thanks for pointing this out.
> 
> Regards,
> Dario
> 
> -- 
> <<This happens because I choose it to happen!>> (Raistlin
Majere)
> -----------------------------------------------------------------
> Dario Faggioli, Ph.D, http://about.me/dario.faggioli
> Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
>

Konrad Rzeszutek Wilk

2013-Aug-29 14:23 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Thu, Aug 29, 2013 at 02:41:23PM +0100, David Vrabel
wrote:> On 29/08/13 01:11, Matt Wilson wrote:
> > 
> > I think it wouldn''t be too hard to construct the static ACPI
tables
> > and provide them in acpi_os_get_root_pointer().
> 
> Elena''s approach looks fine to me from a Linux point of view.
From an maintaince perspective the question is:

 - Why not re-use existing code? As in, if both PV and HVM can use
   SRAT, why not do it that way? This way you are exercising the same
   code path in both guests and it means less bugs to chase.

   Incidententaly this also means that the mechanism to fetch the
   NUMA information from the hypervisor and construct the SRAT tables
   from can be done the same in both hvmloader and Linux kernel.
   Which is nice - b/c if you find bugs in one, there is a chance
   they are the same in the other.

   Also this means one hypercall for both PV and HVM which is neat as
   well.

 - Lastly, the question is - a new ''interface'' in the generic
code is
   added to deal exclusively with Xen. Why special case it when one can
   use the SRAT for example? Is there a compeling technical argument?
   (Is SRAT inferior to vNUMA?)

Of course the counter argument is:

 - It will require more work as now Linux has to use the hypercall to
   construct an ACPI SRAT table. And we need to create a "fake" SRAT
   table and as well an RS.. something structure so that ACPI code
   can find it. Or perhaps this is something libxl can do? Which would
   mean the code to construct this and for HVM is both in Xen code base.
   And the only thing Linux needs to do is turn NUMA on for PV guests
   if it finds an ACPI table (perhaps by scanning the E820?).

 - Will it work? As in will an PV domU be able to run with just one
   ACPI table - just the SRAT and nothing else? Or will it fall flat
   on its face?> 
> It''s still not clear how presenting the information via ACPI would
be an
> improvement.  I also think it is preferrable for the hypercall ABI for
> PV guests to be independent of architecture specifics where possible.
I concur on the hypercall ABI - but I think the hypercall should be
available on both PV and HVM so that it can be used in
both.> 
> If you posted Xen and Linux series producing/using the ACPI SRAT and
> SLIT tables for dom0 and domU PV guests we could better evaluate that
> option but until then I''m inclined to consider Elena''s
approach as the
> better one.
I think short term Elena''s option is better one as it gets it going and
we can experiement with it. Long term I think stashing the data in ACPI
SRAT/SLIT is right. But Elena might not get to it in the next couple of
months - which means somebody else will have to sign up for that.

P.S.
I did for fun enable ACPI support in the PV domU guests and it boots fine.
It does complain about missing tables, but that was it.

George Dunlap

2013-Aug-29 14:32 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On 29/08/13 15:23, Konrad Rzeszutek Wilk wrote:> On Thu, Aug 29, 2013 at 02:41:23PM +0100, David Vrabel wrote:
>> On 29/08/13 01:11, Matt Wilson wrote:
>>> I think it wouldn''t be too hard to construct the static
ACPI tables
>>> and provide them in acpi_os_get_root_pointer().
>> Elena''s approach looks fine to me from a Linux point of view.
>  From an maintaince perspective the question is:
>
>   - Why not re-use existing code? As in, if both PV and HVM can use
>     SRAT, why not do it that way? This way you are exercising the same
>     code path in both guests and it means less bugs to chase.
>
>     Incidententaly this also means that the mechanism to fetch the
>     NUMA information from the hypervisor and construct the SRAT tables
>     from can be done the same in both hvmloader and Linux kernel.
If the SRAT tables are built by hvmloader for HVM guests, couldn''t 
hvmloader make this hypercall and construct the tables, instead of 
having the domain builder do it?  That would mean that most of the 
codepath is the same; HVM just has the extra step of encoding and 
decoding the information in SRAT form.
>> If you posted Xen and Linux series producing/using the ACPI SRAT and
>> SLIT tables for dom0 and domU PV guests we could better evaluate that
>> option but until then I''m inclined to consider
Elena''s approach as the
>> better one.
> I think short term Elena''s option is better one as it gets it
going and
> we can experiement with it. Long term I think stashing the data in ACPI
> SRAT/SLIT is right. But Elena might not get to it in the next couple of
> months - which means somebody else will have to sign up for that.
I definitely think that Elena needs to continue on the same path for 
now, so she can actually have closure on the project in a reasonable 
amount of time.

  -George

Konrad Rzeszutek Wilk

2013-Aug-29 14:51 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Thu, Aug 29, 2013 at 03:32:13PM +0100, George Dunlap
wrote:> On 29/08/13 15:23, Konrad Rzeszutek Wilk wrote:
> >On Thu, Aug 29, 2013 at 02:41:23PM +0100, David Vrabel wrote:
> >>On 29/08/13 01:11, Matt Wilson wrote:
> >>>I think it wouldn''t be too hard to construct the
static ACPI tables
> >>>and provide them in acpi_os_get_root_pointer().
> >>Elena''s approach looks fine to me from a Linux point of
view.
> > From an maintaince perspective the question is:
> >
> >  - Why not re-use existing code? As in, if both PV and HVM can use
> >    SRAT, why not do it that way? This way you are exercising the same
> >    code path in both guests and it means less bugs to chase.
> >
> >    Incidententaly this also means that the mechanism to fetch the
> >    NUMA information from the hypervisor and construct the SRAT tables
> >    from can be done the same in both hvmloader and Linux kernel.
> 
> If the SRAT tables are built by hvmloader for HVM guests, couldn''t
> hvmloader make this hypercall and construct the tables, instead of
> having the domain builder do it?  That would mean that most of the
> codepath is the same; HVM just has the extra step of encoding and
> decoding the information in SRAT form.
Correct. Or the domain builder can build it for both PV and HVM.

We do have the full control of E820 for PV - so can make an
"E820_ACPI"
region and stash the ACPI tables.

Either way would work. Whichever one means more code sharing is probably
the best bang for the buck to say.> 
> >>If you posted Xen and Linux series producing/using the ACPI SRAT
and
> >>SLIT tables for dom0 and domU PV guests we could better evaluate
that
> >>option but until then I''m inclined to consider
Elena''s approach as the
> >>better one.
> >I think short term Elena''s option is better one as it gets it
going and
> >we can experiement with it. Long term I think stashing the data in ACPI
> >SRAT/SLIT is right. But Elena might not get to it in the next couple of
> >months - which means somebody else will have to sign up for that.
> 
> I definitely think that Elena needs to continue on the same path for
> now, so she can actually have closure on the project in a reasonable
> amount of time.
I concur. The caveat is that it could mean that the x86 maintainers might
object to the generic path having the special case for Xen and that
particular patch part won''t be upstreamed until it has been taken care
of.

If Elena is OK with that possiblity  - then that is fine with me.

Dario Faggioli

2013-Aug-29 22:29 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On gio, 2013-08-29 at 10:51 -0400, Konrad Rzeszutek Wilk
wrote:> On Thu, Aug 29, 2013 at 03:32:13PM +0100, George Dunlap wrote:
> > On 29/08/13 15:23, Konrad Rzeszutek Wilk wrote:
> > >  - Why not re-use existing code? As in, if both PV and HVM can
use
> > >    SRAT, why not do it that way? This way you are exercising the
same
> > >    code path in both guests and it means less bugs to chase.
> > >
> > >    Incidententaly this also means that the mechanism to fetch the
> > >    NUMA information from the hypervisor and construct the SRAT
tables
> > >    from can be done the same in both hvmloader and Linux kernel.
> > 
> > If the SRAT tables are built by hvmloader for HVM guests,
couldn''t
> > hvmloader make this hypercall and construct the tables, instead of
> > having the domain builder do it?  That would mean that most of the
> > codepath is the same; HVM just has the extra step of encoding and
> > decoding the information in SRAT form.
> 
> Correct. 
>Indeed. Also, Matt mentioned the HVM implementation does the most of the
work in libxc and hvmloader, is that the case? (Matt, I knew about the
"old" HVM-NUMA series from Andre, but I don''t have the
details fresh
enough right now... Will take another look ASAP.)

If yes, please, consider that, when talking about PV-vNUMA, although
right now the series addresses DomU only, we plan to make it possible
for Dom0 to have a virtual NUMA topology too. In that case, I don''t
think any code from libxc and hvmloader could be shared, could it?

So, to me, it sort of looks like we risk to introduce more code
duplication than what we''re trying to avoid! :-P
> > >I think short term Elena''s option is better one as it
gets it going and
> > >we can experiement with it. Long term I think stashing the data in
ACPI
> > >SRAT/SLIT is right. But Elena might not get to it in the next
couple of
> > >months - which means somebody else will have to sign up for that.
> > 
> > I definitely think that Elena needs to continue on the same path for
> > now, so she can actually have closure on the project in a reasonable
> > amount of time.
> 
> I concur. The caveat is that it could mean that the x86 maintainers might
> object to the generic path having the special case for Xen and that
> particular patch part won''t be upstreamed until it has been taken
care of.
> 
> If Elena is OK with that possiblity  - then that is fine with me.
>Sorry, I''m not sure I understand what you mean here. When talking about
PV-NUMA, with Elena''s approach, we have a Xen special case in
numa_init(). With the fake table approach, we''d have a Xen special case
in the ACPI parsing code (Matt was talking about something like
acpi_os_get_root_pointer() ), wouldn''t we?

So, either way, we''ll have a Xen special case, the only difference I
see
is that, in the latter, we''d probably have to deal with the ACPI
maintainers instead than with the x86 ones. :-)

FWIW, x86_numa_init() already looks like this:

 621 void __init x86_numa_init(void)
 622 {
 623         if (!numa_off) {
 624 #ifdef CONFIG_X86_NUMAQ
 625                 if (!numa_init(numaq_numa_init))
 626                         return;
 627 #endif
 628 #ifdef CONFIG_ACPI_NUMA
 629                 if (!numa_init(x86_acpi_numa_init))
 630                         return;
 631 #endif
 632 #ifdef CONFIG_AMD_NUMA
 633                 if (!numa_init(amd_numa_init))
 634                         return;
 635 #endif
 636         }
 637 
 638         numa_init(dummy_numa_init);
 639 }

I.e., quite a bit of architecture/implementation special casing already!
So, perhaps, having some more `#ifdef CONFIG_XEN'' and/or `if
(xen_domain())'' in there won''t be that much upsetting after
all. :-)

Thanks and Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Konrad Rzeszutek Wilk

2013-Aug-30 12:57 UTC

head link

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

On Fri, Aug 30, 2013 at 12:29:09AM +0200, Dario Faggioli
wrote:> On gio, 2013-08-29 at 10:51 -0400, Konrad Rzeszutek Wilk wrote:
> > On Thu, Aug 29, 2013 at 03:32:13PM +0100, George Dunlap wrote:
> > > On 29/08/13 15:23, Konrad Rzeszutek Wilk wrote:
> > > >  - Why not re-use existing code? As in, if both PV and HVM
can use
> > > >    SRAT, why not do it that way? This way you are exercising
the same
> > > >    code path in both guests and it means less bugs to chase.
> > > >
> > > >    Incidententaly this also means that the mechanism to
fetch the
> > > >    NUMA information from the hypervisor and construct the
SRAT tables
> > > >    from can be done the same in both hvmloader and Linux
kernel.
> > > 
> > > If the SRAT tables are built by hvmloader for HVM guests,
couldn''t
> > > hvmloader make this hypercall and construct the tables, instead
of
> > > having the domain builder do it?  That would mean that most of
the
> > > codepath is the same; HVM just has the extra step of encoding and
> > > decoding the information in SRAT form.
> > 
> > Correct. 
> >
> Indeed. Also, Matt mentioned the HVM implementation does the most of the
> work in libxc and hvmloader, is that the case? (Matt, I knew about the
> "old" HVM-NUMA series from Andre, but I don''t have the
details fresh
> enough right now... Will take another look ASAP.)
> 
> If yes, please, consider that, when talking about PV-vNUMA, although
> right now the series addresses DomU only, we plan to make it possible
> for Dom0 to have a virtual NUMA topology too. In that case, I
don''t
> think any code from libxc and hvmloader could be shared, could it?
But it could be stuck in the hypervisor at that point. So all of that
would reside within the Xen source base - in three places.
Yuck.> 
> So, to me, it sort of looks like we risk to introduce more code
> duplication than what we''re trying to avoid! :-P
Would be nice if this could be somehow stuck in a libary that all
three (hvmloader, libxc and hypervisor) could be built with.
> 
> > > >I think short term Elena''s option is better one as
it gets it going and
> > > >we can experiement with it. Long term I think stashing the
data in ACPI
> > > >SRAT/SLIT is right. But Elena might not get to it in the next
couple of
> > > >months - which means somebody else will have to sign up for
that.
> > > 
> > > I definitely think that Elena needs to continue on the same path
for
> > > now, so she can actually have closure on the project in a
reasonable
> > > amount of time.
> > 
> > I concur. The caveat is that it could mean that the x86 maintainers
might
> > object to the generic path having the special case for Xen and that
> > particular patch part won''t be upstreamed until it has been
taken care of.
> > 
> > If Elena is OK with that possiblity  - then that is fine with me.
> >
> Sorry, I''m not sure I understand what you mean here. When talking
about
> PV-NUMA, with Elena''s approach, we have a Xen special case in
> numa_init(). With the fake table approach, we''d have a Xen special
case
> in the ACPI parsing code (Matt was talking about something like
> acpi_os_get_root_pointer() ), wouldn''t we?
> 
> So, either way, we''ll have a Xen special case, the only difference
I see
> is that, in the latter, we''d probably have to deal with the ACPI
> maintainers instead than with the x86 ones. :-)
Correct.> 
> FWIW, x86_numa_init() already looks like this:
> 
>  621 void __init x86_numa_init(void)
>  622 {
>  623         if (!numa_off) {
>  624 #ifdef CONFIG_X86_NUMAQ
>  625                 if (!numa_init(numaq_numa_init))
>  626                         return;
>  627 #endif
>  628 #ifdef CONFIG_ACPI_NUMA
>  629                 if (!numa_init(x86_acpi_numa_init))
>  630                         return;
>  631 #endif
>  632 #ifdef CONFIG_AMD_NUMA
>  633                 if (!numa_init(amd_numa_init))
>  634                         return;
>  635 #endif
>  636         }
>  637 
>  638         numa_init(dummy_numa_init);
>  639 }
> 
> I.e., quite a bit of architecture/implementation special casing already!
> So, perhaps, having some more `#ifdef CONFIG_XEN'' and/or `if
> (xen_domain())'' in there won''t be that much upsetting
after all. :-)
Or perhaps fix that special casing and have something similar to the
iommu_detect API. And make that ''iommu_detect'' be more generic
now.
> 
> Thanks and Regards,
> Dario
> 
> -- 
> <<This happens because I choose it to happen!>> (Raistlin
Majere)
> -----------------------------------------------------------------
> Dario Faggioli, Ph.D, http://about.me/dario.faggioli
> Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
>

Xen devel - Aug 2013 - [PATCH RFC 0/2] linux/vnuma: vnuma topology PV guest

[PATCH RFC 0/2] linux/vnuma: vnuma topology PV guest

[PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

[PATCH RFC 2/2] linux/vnuma: Enables NUMA support for PV guest

Re: [PATCH RFC 2/2] linux/vnuma: Enables NUMA support for PV guest

Re: [PATCH RFC 0/2] linux/vnuma: vnuma topology PV guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 2/2] linux/vnuma: Enables NUMA support for PV guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 2/2] linux/vnuma: Enables NUMA support for PV guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest