Ryan Harper
2007-Apr-10 01:09 UTC
[Xen-devel] [RFC][PATCH 1/2] export NUMA topology from xen
For post-3.0.5 inclusion: This patch modifies the physinfo hcall to export NUMA CPU and Memory topology information. The new physinfo hcall is integrated into libxc and xend (xm info specifically). Included in this patch is a minor tweak to xm-test''s xm info testcase. The new fields in xm info are: nr_nodes : 4 mem_chunks : node0:0x0000000000000000-0x0000000190000000 node1:0x0000000190000000-0x0000000300000000 node2:0x0000000300000000-0x0000000470000000 node3:0x0000000470000000-0x0000000640000000 node_to_cpu : node0:0-7 node1:8-15 node2:16-23 node3:24-31 I''ve also reworked the the physinfo call to contain an array of cpu_to_node elements rather than node_to_cpu to support machines larger than 64-way. I convert the array back to node_to_cpu for brevity in xm info display. -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx (512) 838-9253 T/L: 678-9253 ryanh@us.ibm.com diffstat output: b/xen/include/public/numa_structs.h | 27 ++++++ tools/libxc/xc_misc.c | 4 tools/libxc/xenctrl.h | 3 tools/python/xen/lowlevel/xc/xc.c | 81 +++++++++++++++++--- tools/python/xen/xend/XendNode.py | 67 ++++++++++++++++ tools/xenmon/xenbaked.c | 3 tools/xenstat/libxenstat/src/xenstat.c | 3 tools/xentrace/xentrace.c | 3 tools/xm-test/tests/info/02_info_compiledata_pos.py | 4 xen/arch/x86/sysctl.c | 47 +++++++++++ xen/include/asm-x86/numa.h | 7 - xen/include/public/arch-x86/xen.h | 1 xen/include/public/sysctl.h | 3 13 files changed, 232 insertions(+), 21 deletions(-) Signed-off-by: Ryan Harper <ryanh@us.ibm.com> --- This patch modifies the physinfo hcall to export NUMA CPU and Memory topology information. The new physinfo hcall is integrated into libxc and xend (xm info specifically). Included in this patch is a minor tweak to xm-test''s xm info testcase. The new fields in xm info are: nr_nodes : 4 mem_chunks : node0:0x0000000000000000-0x0000000190000000 node1:0x0000000190000000-0x0000000300000000 node2:0x0000000300000000-0x0000000470000000 node3:0x0000000470000000-0x0000000640000000 node_to_cpu : node0:0-7 node1:8-15 node2:16-23 node3:24-31 I''ve also reworked the the physinfo call to contain an array of cpu_to_node elements rather than node_to_cpu to support machines larger than 64-way. I convert the array back to node_to_cpu for brevity in xm info display. Signed-off-by: Ryan Harper <ryanh@us.ibm.com> diff -r 8f9ca49175ce tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Sat Mar 31 19:02:09 2007 +0100 +++ b/tools/libxc/xc_misc.c Mon Apr 02 16:33:10 2007 -0500 @@ -59,6 +59,10 @@ int xc_physinfo(int xc_handle, DECLARE_SYSCTL; sysctl.cmd = XEN_SYSCTL_physinfo; + + /* set pointers to caller''s so memcpy doesn''t clobber them */ + sysctl.u.physinfo.memory_chunks = put_info->memory_chunks; + sysctl.u.physinfo.cpu_to_node = put_info->cpu_to_node; if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 ) return ret; diff -r 8f9ca49175ce tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Sat Mar 31 19:02:09 2007 +0100 +++ b/tools/libxc/xenctrl.h Mon Apr 02 16:33:10 2007 -0500 @@ -28,6 +28,7 @@ #include <xen/memory.h> #include <xen/acm.h> #include <xen/acm_ops.h> +#include <xen/numa_structs.h> #ifdef __ia64__ #define XC_PAGE_SHIFT 14 @@ -473,6 +474,8 @@ int xc_send_debug_keys(int xc_handle, ch int xc_send_debug_keys(int xc_handle, char *keys); typedef xen_sysctl_physinfo_t xc_physinfo_t; +typedef node_data_t xc_memory_chunk_t; +typedef uint32_t xc_cpu_to_node_t; int xc_physinfo(int xc_handle, xc_physinfo_t *info); diff -r 8f9ca49175ce tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Sat Mar 31 19:02:09 2007 +0100 +++ b/tools/python/xen/lowlevel/xc/xc.c Mon Apr 02 16:33:10 2007 -0500 @@ -644,10 +644,27 @@ static PyObject *pyxc_physinfo(XcObject { xc_physinfo_t info; char cpu_cap[128], *p=cpu_cap, *q=cpu_cap; - int i; + int i,j, nr_cpus; + PyObject *ret_obj, *memchunk_obj, *node_to_cpu_obj; + xc_memory_chunk_t *chunks; + xc_cpu_to_node_t *map; + + /* make space for mem chunks */ + chunks = (xc_memory_chunk_t *)malloc( sizeof(xc_memory_chunk_t) * + PUBLIC_MAXCHUNKS ); + set_xen_guest_handle(info.memory_chunks, chunks); + + /* make space for cpu_to_node mapping */ + map = (xc_cpu_to_node_t *)malloc( sizeof(xc_cpu_to_node_t) * + PUBLIC_MAX_CPUS ); + set_xen_guest_handle(info.cpu_to_node, map); if ( xc_physinfo(self->xc_handle, &info) != 0 ) return pyxc_error_to_exception(); + + /* calc number of cpus, ignore nr_nodes since sockets_per node is bogus */ + nr_cpus = info.threads_per_core * info.cores_per_socket * + info.sockets_per_node; *q=0; for(i=0;i<sizeof(info.hw_cap)/4;i++) @@ -659,16 +676,58 @@ static PyObject *pyxc_physinfo(XcObject if(q>cpu_cap) *(q-1)=0; - return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}", - "threads_per_core", info.threads_per_core, - "cores_per_socket", info.cores_per_socket, - "sockets_per_node", info.sockets_per_node, - "nr_nodes", info.nr_nodes, - "total_memory", pages_to_kib(info.total_pages), - "free_memory", pages_to_kib(info.free_pages), - "scrub_memory", pages_to_kib(info.scrub_pages), - "cpu_khz", info.cpu_khz, - "hw_caps", cpu_cap); + ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}", + "threads_per_core", info.threads_per_core, + "cores_per_socket", info.cores_per_socket, + "sockets_per_node", info.sockets_per_node, + "total_memory", pages_to_kib(info.total_pages), + "free_memory", pages_to_kib(info.free_pages), + "scrub_memory", pages_to_kib(info.scrub_pages), + "cpu_khz", info.cpu_khz, + "hw_caps", cpu_cap); + /* memchunks */ + memchunk_obj = PyList_New(0); + + /* build list of each memchunk''s attributes, converting pfn to paddr */ + for ( i=0; i<info.nr_nodes; i++ ) + { + PyList_Append(memchunk_obj, + Py_BuildValue("{s:i,s:K,s:K}", + "node" , chunks[i].node_id, + "start_paddr", chunks[i].node_start_pfn << XC_PAGE_SHIFT, + "end_paddr" , (chunks[i].node_start_pfn + + chunks[i].node_spanned_pages) << XC_PAGE_SHIFT )); + } + PyDict_SetItemString(ret_obj, "mem_chunks", memchunk_obj); + + /* node to cpu mappings */ + node_to_cpu_obj = PyList_New(0); + + /* make a list for each node */ + for ( i=0; i<info.nr_nodes; i++) + { + PyObject *cpus = PyList_New(0); + + /* walk the cpu_to_node array, for each cpu + which maps to node i, add to cpus list */ + for ( j=0; j<nr_cpus; j++) + { + /* this cpu j maps to node i */ + if ( i == (uint32_t)map[j]) + PyList_Append(cpus, PyInt_FromLong(j)); + } + PyList_Append(node_to_cpu_obj, cpus); + } + /* add list of node to cpu mappings and nr_nodes to physinfo dictionary */ + PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj); + PyDict_SetItemString(ret_obj, "nr_nodes", + Py_BuildValue("i", info.nr_nodes)); + + /* free malloc''d memory */ + free(chunks); + free(map); + + return ret_obj; } static PyObject *pyxc_xeninfo(XcObject *self) diff -r 8f9ca49175ce tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Sat Mar 31 19:02:09 2007 +0100 +++ b/tools/python/xen/xend/XendNode.py Mon Apr 02 11:48:17 2007 -0500 @@ -534,6 +534,69 @@ class XendNode: [''version'', ver], [''machine'', mch]] + def list_to_rangepairs(self,cmap): + cmap.sort() + pairs = [] + x = y = 0 + for i in range(0,len(cmap)): + try: + if ((cmap[y+1] - cmap[i]) > 1): + pairs.append((cmap[x],cmap[y])) + x = y = i+1 + else: + y = y + 1 + # if we go off the end, then just add x to y + except IndexError: + pairs.append((cmap[x],cmap[y])) + + return pairs + + def format_pairs(self,pairs): + if not pairs: + return "no cpus" + out = "" + for f,s in pairs: + if (f==s): + out += ''%d''%f + else: + out += ''%d-%d''%(f,s) + out += '','' + # trim trailing '','' + return out[:-1] + + def list_to_strrange(self,list): + return self.format_pairs(self.list_to_rangepairs(list)) + + def format_memchunks(self, pinfo): + str='''' + whitespace='''' + try: + chunk=pinfo[''mem_chunks''] + for i in range(0, pinfo[''nr_nodes'']): + str+=''%snode%d:0x%016x-0x%016x\n'' % (whitespace, + chunk[i][''node''], + chunk[i][''start_paddr''], + chunk[i][''end_paddr'']) + whitespace=''%25s'' % '''' + except: + str=''none\n'' + return str[:-1] + + def format_node_to_cpu(self, pinfo): + str='''' + whitespace='''' + try: + node_to_cpu=pinfo[''node_to_cpu''] + for i in range(0, pinfo[''nr_nodes'']): + str+=''%snode%d:%s\n'' % (whitespace, + i, + self.list_to_strrange(node_to_cpu[i])) + whitespace=''%25s'' % '''' + except: + str=''none\n'' + return str[:-1]; + + def physinfo(self): info = self.xc.physinfo() @@ -546,6 +609,8 @@ class XendNode: # physinfo is in KiB, need it in MiB info[''total_memory''] = info[''total_memory''] / 1024 info[''free_memory''] = info[''free_memory''] / 1024 + info[''mem_chunks''] = self.format_memchunks(info) + info[''node_to_cpu''] = self.format_node_to_cpu(info) ITEM_ORDER = [''nr_cpus'', ''nr_nodes'', @@ -556,6 +621,8 @@ class XendNode: ''hw_caps'', ''total_memory'', ''free_memory'', + ''mem_chunks'', + ''node_to_cpu'' ] return [[k, info[k]] for k in ITEM_ORDER] diff -r 8f9ca49175ce tools/xenmon/xenbaked.c --- a/tools/xenmon/xenbaked.c Sat Mar 31 19:02:09 2007 +0100 +++ b/tools/xenmon/xenbaked.c Mon Apr 02 16:33:10 2007 -0500 @@ -448,6 +448,9 @@ unsigned int get_num_cpus(void) int xc_handle = xc_interface_open(); int ret; + /* ensure memory_chunks and node_to_cpu are NULL */ + memset(&physinfo, 0, sizeof(physinfo)); + ret = xc_physinfo(xc_handle, &physinfo); if ( ret != 0 ) diff -r 8f9ca49175ce tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Sat Mar 31 19:02:09 2007 +0100 +++ b/tools/xenstat/libxenstat/src/xenstat.c Mon Apr 02 16:33:10 2007 -0500 @@ -147,6 +147,9 @@ xenstat_node *xenstat_get_node(xenstat_h /* Store the handle in the node for later access */ node->handle = handle; + + /* ensure memory_chunks and node_to_cpu are NULL */ + memset(&physinfo, 0, sizeof(physinfo)); /* Get information about the physical system */ if (xc_physinfo(handle->xc_handle, &physinfo) < 0) { diff -r 8f9ca49175ce tools/xentrace/xentrace.c --- a/tools/xentrace/xentrace.c Sat Mar 31 19:02:09 2007 +0100 +++ b/tools/xentrace/xentrace.c Mon Apr 02 16:33:10 2007 -0500 @@ -260,6 +260,9 @@ unsigned int get_num_cpus(void) int xc_handle = xc_interface_open(); int ret; + /* ensure memory_chunks and node_to_cpu are NULL */ + memset(&physinfo, 0, sizeof(physinfo)); + ret = xc_physinfo(xc_handle, &physinfo); if ( ret != 0 ) diff -r 8f9ca49175ce tools/xm-test/tests/info/02_info_compiledata_pos.py --- a/tools/xm-test/tests/info/02_info_compiledata_pos.py Sat Mar 31 19:02:09 2007 +0100 +++ b/tools/xm-test/tests/info/02_info_compiledata_pos.py Mon Apr 02 11:48:17 2007 -0500 @@ -18,9 +18,7 @@ for line in lines: for line in lines: pieces = line.split(" : ", 1) - if len(pieces) < 2: - FAIL("Found invalid line: [%s]" % line) - else: + if len(pieces) > 1: map[pieces[0]] = pieces[1] for field in ["cores_per_socket", "threads_per_core", "cpu_mhz", diff -r 8f9ca49175ce xen/arch/x86/sysctl.c --- a/xen/arch/x86/sysctl.c Sat Mar 31 19:02:09 2007 +0100 +++ b/xen/arch/x86/sysctl.c Mon Apr 02 16:33:10 2007 -0500 @@ -23,6 +23,10 @@ #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/processor.h> +#include <asm/numa.h> +#include <xen/nodemask.h> + +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) long arch_do_sysctl( struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) @@ -34,6 +38,10 @@ long arch_do_sysctl( case XEN_SYSCTL_physinfo: { + int i; + node_data_t *chunks; + uint32_t *map, cpu_to_node_map[NR_CPUS]; + xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo; pi->threads_per_core @@ -43,7 +51,6 @@ long arch_do_sysctl( pi->sockets_per_node = num_online_cpus() / cpus_weight(cpu_core_map[0]); - pi->nr_nodes = 1; pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->scrub_pages = avail_scrub_pages(); @@ -51,6 +58,44 @@ long arch_do_sysctl( memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4); ret = 0; + + /* fetch memory_chunk pointer from guest*/ + get_xen_guest_handle(chunks, sysctl->u.physinfo.memory_chunks); + + /* if it is set, fill out memory chunk array */ + if ( chunks != NULL ) + for_each_online_node(i) + { + /* copy memory chunk structs to guest */ + if ( copy_to_guest_offset(sysctl->u.physinfo.memory_chunks, i, + &(node_data[i]), 1) ) { + ret = -EFAULT; + break; + } + } + + /* set number of notes */ + pi->nr_nodes = num_online_nodes(); + + /* fetch cpu_to_node pointer from guest */ + get_xen_guest_handle(map, sysctl->u.physinfo.cpu_to_node); + + /* if set, fill out cpu_to_node array */ + if ( map != NULL ) + { + /* copy cpu to node mapping to domU */ + memset(cpu_to_node_map, 0, sizeof(cpu_to_node_map)); + for ( i = 0; i < num_online_cpus(); i++) + { + cpu_to_node_map[i]=cpu_to_node(i); + if ( copy_to_guest_offset(sysctl->u.physinfo.cpu_to_node, + i, &(cpu_to_node_map[i]), 1) ) { + ret = -EFAULT; + break; + } + } + } + if ( copy_to_guest(u_sysctl, sysctl, 1) ) ret = -EFAULT; } diff -r 8f9ca49175ce xen/include/asm-x86/numa.h --- a/xen/include/asm-x86/numa.h Sat Mar 31 19:02:09 2007 +0100 +++ b/xen/include/asm-x86/numa.h Mon Apr 02 11:48:17 2007 -0500 @@ -2,6 +2,7 @@ #define _ASM_X8664_NUMA_H 1 #include <xen/cpumask.h> +#include <public/numa_structs.h> #define NODES_SHIFT 6 @@ -44,12 +45,6 @@ extern int memnode_shift; extern int memnode_shift; extern u8 memnodemap[NODEMAPSIZE]; -struct node_data { - unsigned long node_start_pfn; - unsigned long node_spanned_pages; - unsigned int node_id; -}; - extern struct node_data node_data[]; static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) diff -r 8f9ca49175ce xen/include/public/arch-x86/xen.h --- a/xen/include/public/arch-x86/xen.h Sat Mar 31 19:02:09 2007 +0100 +++ b/xen/include/public/arch-x86/xen.h Mon Apr 02 11:48:17 2007 -0500 @@ -54,6 +54,7 @@ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigne __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +__DEFINE_XEN_GUEST_HANDLE(u64, uint64_t); DEFINE_XEN_GUEST_HANDLE(char); DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(long); diff -r 8f9ca49175ce xen/include/public/numa_structs.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/public/numa_structs.h Mon Apr 02 11:48:17 2007 -0500 @@ -0,0 +1,27 @@ +/* + * Ryan Grimm <grimm@us.ibm.com> + * Ryan Harper <ryanh@us.ibm.com> + * Copyright 2006, 2007 International Business Machines Corporation. + * + */ + +#ifndef __XEN_PUBLIC_NUMA_STRUCTS_H__ + +#define __XEN_PUBLIC_NUMA_STRUCTS_H__ + +#include "xen.h" + +/* define these for xc to use b/c MAX_NUMNODES and MAX_CHUNKS + * are not exposed in /public */ +#define PUBLIC_MAX_NUMNODES 16 +#define PUBLIC_MAXCHUNKS 32 +#define PUBLIC_MAX_CPUS 256 + +typedef struct node_data { + unsigned long node_start_pfn; + unsigned long node_spanned_pages; + unsigned int node_id; +} node_data_t; +DEFINE_XEN_GUEST_HANDLE(node_data_t); + +#endif diff -r 8f9ca49175ce xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Sat Mar 31 19:02:09 2007 +0100 +++ b/xen/include/public/sysctl.h Mon Apr 02 16:33:10 2007 -0500 @@ -33,6 +33,7 @@ #include "xen.h" #include "domctl.h" +#include "numa_structs.h" #define XEN_SYSCTL_INTERFACE_VERSION 0x00000003 @@ -85,6 +86,8 @@ struct xen_sysctl_physinfo { uint64_aligned_t free_pages; uint64_aligned_t scrub_pages; uint32_t hw_cap[8]; + XEN_GUEST_HANDLE(node_data_t) memory_chunks; + XEN_GUEST_HANDLE(uint32_t) cpu_to_node; }; typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Apr-10 09:12 UTC
Re: [Xen-devel] [RFC][PATCH 1/2] export NUMA topology from xen
On 10/4/07 02:09, "Ryan Harper" <ryanh@us.ibm.com> wrote:> nr_nodes : 4 > mem_chunks : node0:0x0000000000000000-0x0000000190000000 > node1:0x0000000190000000-0x0000000300000000 > node2:0x0000000300000000-0x0000000470000000 > node3:0x0000000470000000-0x0000000640000000 > node_to_cpu : node0:0-7 > node1:8-15 > node2:16-23 > node3:24-31 > > I''ve also reworked the the physinfo call to contain an array of > cpu_to_node elements rather than node_to_cpu to support machines larger > than 64-way. I convert the array back to node_to_cpu for brevity in > xm info display.The same would make sense for memory regions (i.e., have a list of memory-regions and include a node identifier for each one, rather than mapping node-id to memory-region) as this would allow to have multiple memory regions per node quite easily. But actually I''m not convinced that allowing dom0 to read out the physical addresses of memory regions is at all useful -- why would anyone care which particular physical address ranges belong to a particular node? The hypercall to find amount of free memory per node seems more useful, and probably sufficient by itself. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ryan Harper
2007-Apr-24 15:22 UTC
Re: [Xen-devel] [RFC][PATCH 1/2] export NUMA topology from xen
* Keir Fraser <keir@xensource.com> [2007-04-10 04:13]:> On 10/4/07 02:09, "Ryan Harper" <ryanh@us.ibm.com> wrote: > > > nr_nodes : 4 > > mem_chunks : node0:0x0000000000000000-0x0000000190000000 > > node1:0x0000000190000000-0x0000000300000000 > > node2:0x0000000300000000-0x0000000470000000 > > node3:0x0000000470000000-0x0000000640000000 > > node_to_cpu : node0:0-7 > > node1:8-15 > > node2:16-23 > > node3:24-31 > > > > I''ve also reworked the the physinfo call to contain an array of > > cpu_to_node elements rather than node_to_cpu to support machines larger > > than 64-way. I convert the array back to node_to_cpu for brevity in > > xm info display. > > The same would make sense for memory regions (i.e., have a list of > memory-regions and include a node identifier for each one, rather than > mapping node-id to memory-region) as this would allow to have multiple > memory regions per node quite easily. But actually I''m not convinced that > allowing dom0 to read out the physical addresses of memory regions is at all > useful -- why would anyone care which particular physical address ranges > belong to a particular node? The hypercall to find amount of free memory per > node seems more useful, and probably sufficient by itself.Updated. - Dropped mem_chunks (removed that from existing ia64 NUMA physinfo) - Fixed up ia64 cpu_to_node_map array size (was MAX_NUMNODES, now NR_CPUS) - Fixed sockets_per_node calculation (was bogus on Opteron systems) - Updated all arches physinfo call to use num_online_nodes() and new sockets_per_node calculation Untested on ia64, ppc. -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx (512) 838-9253 T/L: 678-9253 ryanh@us.ibm.com diffstat output: tools/libxc/xc_misc.c | 3 tools/libxc/xenctrl.h | 1 tools/python/xen/lowlevel/xc/xc.c | 61 ++++++++++++++++---- tools/python/xen/xend/XendNode.py | 50 ++++++++++++++++ tools/xenmon/xenbaked.c | 3 tools/xenstat/libxenstat/src/xenstat.c | 3 tools/xentrace/xentrace.c | 3 tools/xm-test/tests/info/02_info_compiledata_pos.py | 4 - xen/arch/ia64/xen/dom0_ops.c | 46 +-------------- xen/arch/powerpc/sysctl.c | 6 - xen/arch/x86/sysctl.c | 33 +++++++++- xen/include/public/sysctl.h | 1 12 files changed, 152 insertions(+), 62 deletions(-) Signed-off-by: Ryan Harper <ryanh@us.ibm.com> --- This patch modifies the physinfo hcall to export NUMA cpu_to_node topology information. The new physinfo hcall is integrated into libxc and xend (xm info specifically). Included in this patch is a minor tweak to xm-test''s xm info testcase. I''ve also fixed the sockets_per_node calculation. The new fields in xm info are: nr_cpus : 32 nr_nodes : 4 sockets_per_node : 4 cores_per_socket : 1 threads_per_core : 2 ... node_to_cpu : node0:0-7 node1:8-15 node2:16-23 node3:24-31 I''ve also reworked the the physinfo call to contain an array of cpu_to_node elements rather than node_to_cpu to support machines larger than 64-ways. I convert the array back to node_to_cpu for brevity in xm info display. Signed-off-by: Ryan Harper <ryanh@us.ibm.com> diff -r 400a3dca237e tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/libxc/xc_misc.c Fri Apr 13 13:04:24 2007 -0500 @@ -59,6 +59,9 @@ int xc_physinfo(int xc_handle, DECLARE_SYSCTL; sysctl.cmd = XEN_SYSCTL_physinfo; + + /* set pointers to caller''s so memcpy doesn''t clobber them */ + sysctl.u.physinfo.cpu_to_node = put_info->cpu_to_node; if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 ) return ret; diff -r 400a3dca237e tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/libxc/xenctrl.h Fri Apr 13 13:04:24 2007 -0500 @@ -473,6 +473,7 @@ int xc_send_debug_keys(int xc_handle, ch int xc_send_debug_keys(int xc_handle, char *keys); typedef xen_sysctl_physinfo_t xc_physinfo_t; +typedef uint32_t xc_cpu_to_node_t; int xc_physinfo(int xc_handle, xc_physinfo_t *info); diff -r 400a3dca237e tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/python/xen/lowlevel/xc/xc.c Fri Apr 13 15:41:39 2007 -0500 @@ -640,14 +640,26 @@ static PyObject *pyxc_pages_to_kib(XcObj } +#define MAX_NR_CPUS 256 static PyObject *pyxc_physinfo(XcObject *self) { xc_physinfo_t info; char cpu_cap[128], *p=cpu_cap, *q=cpu_cap; - int i; + int i,j, nr_cpus; + PyObject *ret_obj, *node_to_cpu_obj; + xc_cpu_to_node_t *map; + + /* make space for cpu_to_node mapping, up to MAX_NR_CPUS cpus */ + map = (xc_cpu_to_node_t *)malloc( sizeof(xc_cpu_to_node_t) * MAX_NR_CPUS); + + set_xen_guest_handle(info.cpu_to_node, map); if ( xc_physinfo(self->xc_handle, &info) != 0 ) return pyxc_error_to_exception(); + + /* calc number of cpus */ + nr_cpus = info.threads_per_core * info.cores_per_socket * + info.sockets_per_node * info.nr_nodes; *q=0; for(i=0;i<sizeof(info.hw_cap)/4;i++) @@ -659,16 +671,43 @@ static PyObject *pyxc_physinfo(XcObject if(q>cpu_cap) *(q-1)=0; - return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}", - "threads_per_core", info.threads_per_core, - "cores_per_socket", info.cores_per_socket, - "sockets_per_node", info.sockets_per_node, - "nr_nodes", info.nr_nodes, - "total_memory", pages_to_kib(info.total_pages), - "free_memory", pages_to_kib(info.free_pages), - "scrub_memory", pages_to_kib(info.scrub_pages), - "cpu_khz", info.cpu_khz, - "hw_caps", cpu_cap); + ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}", + "threads_per_core", info.threads_per_core, + "cores_per_socket", info.cores_per_socket, + "sockets_per_node", info.sockets_per_node, + "total_memory", pages_to_kib(info.total_pages), + "free_memory", pages_to_kib(info.free_pages), + "scrub_memory", pages_to_kib(info.scrub_pages), + "cpu_khz", info.cpu_khz, + "hw_caps", cpu_cap); + + /* node to cpu mappings */ + node_to_cpu_obj = PyList_New(0); + + /* make a list for each node */ + for ( i=0; i<info.nr_nodes; i++) + { + PyObject *cpus = PyList_New(0); + + /* walk the cpu_to_node array, for each cpu + which maps to node i, add to cpus list */ + for ( j=0; j<nr_cpus; j++) + { + /* this cpu j maps to node i */ + if ( i == (uint32_t)map[j]) + PyList_Append(cpus, PyInt_FromLong(j)); + } + PyList_Append(node_to_cpu_obj, cpus); + } + /* add list of node to cpu mappings and nr_nodes to physinfo dictionary */ + PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj); + PyDict_SetItemString(ret_obj, "nr_nodes", + Py_BuildValue("i", info.nr_nodes)); + + /* free malloc''d memory */ + free(map); + + return ret_obj; } static PyObject *pyxc_xeninfo(XcObject *self) diff -r 400a3dca237e tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/python/xen/xend/XendNode.py Fri Apr 13 13:04:24 2007 -0500 @@ -547,6 +547,54 @@ class XendNode: [''version'', ver], [''machine'', mch]] + def list_to_rangepairs(self,cmap): + cmap.sort() + pairs = [] + x = y = 0 + for i in range(0,len(cmap)): + try: + if ((cmap[y+1] - cmap[i]) > 1): + pairs.append((cmap[x],cmap[y])) + x = y = i+1 + else: + y = y + 1 + # if we go off the end, then just add x to y + except IndexError: + pairs.append((cmap[x],cmap[y])) + + return pairs + + def format_pairs(self,pairs): + if not pairs: + return "no cpus" + out = "" + for f,s in pairs: + if (f==s): + out += ''%d''%f + else: + out += ''%d-%d''%(f,s) + out += '','' + # trim trailing '','' + return out[:-1] + + def list_to_strrange(self,list): + return self.format_pairs(self.list_to_rangepairs(list)) + + def format_node_to_cpu(self, pinfo): + str='''' + whitespace='''' + try: + node_to_cpu=pinfo[''node_to_cpu''] + for i in range(0, pinfo[''nr_nodes'']): + str+=''%snode%d:%s\n'' % (whitespace, + i, + self.list_to_strrange(node_to_cpu[i])) + whitespace=''%25s'' % '''' + except: + str=''none\n'' + return str[:-1]; + + def physinfo(self): info = self.xc.physinfo() @@ -559,6 +607,7 @@ class XendNode: # physinfo is in KiB, need it in MiB info[''total_memory''] = info[''total_memory''] / 1024 info[''free_memory''] = info[''free_memory''] / 1024 + info[''node_to_cpu''] = self.format_node_to_cpu(info) ITEM_ORDER = [''nr_cpus'', ''nr_nodes'', @@ -569,6 +618,7 @@ class XendNode: ''hw_caps'', ''total_memory'', ''free_memory'', + ''node_to_cpu'' ] return [[k, info[k]] for k in ITEM_ORDER] diff -r 400a3dca237e tools/xenmon/xenbaked.c --- a/tools/xenmon/xenbaked.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/xenmon/xenbaked.c Fri Apr 13 13:04:24 2007 -0500 @@ -448,6 +448,9 @@ unsigned int get_num_cpus(void) int xc_handle = xc_interface_open(); int ret; + /* ensure node_to_cpu is NULL */ + memset(&physinfo, 0, sizeof(physinfo)); + ret = xc_physinfo(xc_handle, &physinfo); if ( ret != 0 ) diff -r 400a3dca237e tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/xenstat/libxenstat/src/xenstat.c Fri Apr 13 13:04:24 2007 -0500 @@ -147,6 +147,9 @@ xenstat_node *xenstat_get_node(xenstat_h /* Store the handle in the node for later access */ node->handle = handle; + + /* ensure node_to_cpu is NULL */ + memset(&physinfo, 0, sizeof(physinfo)); /* Get information about the physical system */ if (xc_physinfo(handle->xc_handle, &physinfo) < 0) { diff -r 400a3dca237e tools/xentrace/xentrace.c --- a/tools/xentrace/xentrace.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/xentrace/xentrace.c Fri Apr 13 13:04:24 2007 -0500 @@ -260,6 +260,9 @@ unsigned int get_num_cpus(void) int xc_handle = xc_interface_open(); int ret; + /* ensure node_to_cpu is NULL */ + memset(&physinfo, 0, sizeof(physinfo)); + ret = xc_physinfo(xc_handle, &physinfo); if ( ret != 0 ) diff -r 400a3dca237e tools/xm-test/tests/info/02_info_compiledata_pos.py --- a/tools/xm-test/tests/info/02_info_compiledata_pos.py Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/xm-test/tests/info/02_info_compiledata_pos.py Fri Apr 13 13:04:24 2007 -0500 @@ -18,9 +18,7 @@ for line in lines: for line in lines: pieces = line.split(" : ", 1) - if len(pieces) < 2: - FAIL("Found invalid line: [%s]" % line) - else: + if len(pieces) > 1: map[pieces[0]] = pieces[1] for field in ["cores_per_socket", "threads_per_core", "cpu_mhz", diff -r 400a3dca237e xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Mon Apr 09 12:05:26 2007 +0100 +++ b/xen/arch/ia64/xen/dom0_ops.c Fri Apr 13 13:20:38 2007 -0500 @@ -239,8 +239,7 @@ long arch_do_sysctl(xen_sysctl_t *op, XE { #ifdef IA64_NUMA_PHYSINFO int i; - node_data_t *chunks; - u64 *map, cpu_to_node_map[MAX_NUMNODES]; + uint32_t *map, cpu_to_node_map[NR_CPUS]; #endif xen_sysctl_physinfo_t *pi = &op->u.physinfo; @@ -249,11 +248,9 @@ long arch_do_sysctl(xen_sysctl_t *op, XE cpus_weight(cpu_sibling_map[0]); pi->cores_per_socket cpus_weight(cpu_core_map[0]) / pi->threads_per_core; - pi->sockets_per_node = - num_online_cpus() / cpus_weight(cpu_core_map[0]); -#ifndef IA64_NUMA_PHYSINFO - pi->nr_nodes = 1; -#endif + pi->nr_nodes = num_online_nodes(); + pi->sockets_per_node = num_online_cpus() / + (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core); pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->scrub_pages = avail_scrub_pages(); @@ -263,41 +260,6 @@ long arch_do_sysctl(xen_sysctl_t *op, XE ret = 0; #ifdef IA64_NUMA_PHYSINFO - /* fetch memory_chunk pointer from guest */ - get_xen_guest_handle(chunks, pi->memory_chunks); - - printk("chunks=%p, num_node_memblks=%u\n", chunks, num_node_memblks); - /* if it is set, fill out memory chunk array */ - if (chunks != NULL) { - if (num_node_memblks == 0) { - /* Non-NUMA machine. Put pseudo-values. */ - node_data_t data; - data.node_start_pfn = 0; - data.node_spanned_pages = total_pages; - data.node_id = 0; - /* copy memory chunk structs to guest */ - if (copy_to_guest_offset(pi->memory_chunks, 0, &data, 1)) { - ret = -EFAULT; - break; - } - } else { - for (i = 0; i < num_node_memblks && i < PUBLIC_MAXCHUNKS; i++) { - node_data_t data; - data.node_start_pfn = node_memblk[i].start_paddr >> - PAGE_SHIFT; - data.node_spanned_pages = node_memblk[i].size >> PAGE_SHIFT; - data.node_id = node_memblk[i].nid; - /* copy memory chunk structs to guest */ - if (copy_to_guest_offset(pi->memory_chunks, i, &data, 1)) { - ret = -EFAULT; - break; - } - } - } - } - /* set number of notes */ - pi->nr_nodes = num_online_nodes(); - /* fetch cpu_to_node pointer from guest */ get_xen_guest_handle(map, pi->cpu_to_node); diff -r 400a3dca237e xen/arch/powerpc/sysctl.c --- a/xen/arch/powerpc/sysctl.c Mon Apr 09 12:05:26 2007 +0100 +++ b/xen/arch/powerpc/sysctl.c Fri Apr 13 13:09:31 2007 -0500 @@ -45,10 +45,10 @@ long arch_do_sysctl(struct xen_sysctl *s cpus_weight(cpu_sibling_map[0]); pi->cores_per_socket cpus_weight(cpu_core_map[0]) / pi->threads_per_core; - pi->sockets_per_node = - num_online_cpus() / cpus_weight(cpu_core_map[0]); + pi->sockets_per_node = num_online_cpus() / + (num_online_nodes() * pi->cores_per_socket * pi->threads_per_core); - pi->nr_nodes = 1; + pi->nr_nodes = num_online_nodes(); pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->cpu_khz = cpu_khz; diff -r 400a3dca237e xen/arch/x86/sysctl.c --- a/xen/arch/x86/sysctl.c Mon Apr 09 12:05:26 2007 +0100 +++ b/xen/arch/x86/sysctl.c Fri Apr 13 13:11:15 2007 -0500 @@ -23,6 +23,10 @@ #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/processor.h> +#include <asm/numa.h> +#include <xen/nodemask.h> + +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) long arch_do_sysctl( struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) @@ -34,16 +38,19 @@ long arch_do_sysctl( case XEN_SYSCTL_physinfo: { + int i; + uint32_t *map, cpu_to_node_map[NR_CPUS]; + xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo; pi->threads_per_core cpus_weight(cpu_sibling_map[0]); pi->cores_per_socket cpus_weight(cpu_core_map[0]) / pi->threads_per_core; - pi->sockets_per_node = - num_online_cpus() / cpus_weight(cpu_core_map[0]); + pi->nr_nodes = num_online_nodes(); + pi->sockets_per_node = num_online_cpus() / + (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core); - pi->nr_nodes = 1; pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->scrub_pages = avail_scrub_pages(); @@ -51,6 +58,26 @@ long arch_do_sysctl( memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4); ret = 0; + + /* fetch cpu_to_node pointer from guest */ + get_xen_guest_handle(map, sysctl->u.physinfo.cpu_to_node); + + /* if set, fill out cpu_to_node array */ + if ( map != NULL ) + { + /* for each cpu, mark in which node the cpu belongs */ + memset(cpu_to_node_map, 0, sizeof(cpu_to_node_map)); + for ( i = 0; i < num_online_cpus(); i++) + { + cpu_to_node_map[i]=cpu_to_node(i); + if ( copy_to_guest_offset(sysctl->u.physinfo.cpu_to_node, + i, &(cpu_to_node_map[i]), 1) ) { + ret = -EFAULT; + break; + } + } + } + if ( copy_to_guest(u_sysctl, sysctl, 1) ) ret = -EFAULT; } diff -r 400a3dca237e xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Mon Apr 09 12:05:26 2007 +0100 +++ b/xen/include/public/sysctl.h Fri Apr 13 13:04:24 2007 -0500 @@ -85,6 +85,7 @@ struct xen_sysctl_physinfo { uint64_aligned_t free_pages; uint64_aligned_t scrub_pages; uint32_t hw_cap[8]; + XEN_GUEST_HANDLE(uint32_t) cpu_to_node; }; typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ryan Harper
2007-Jun-06 16:07 UTC
Re: [Xen-devel] [RFC][PATCH 1/2] export NUMA topology from xen
* Ryan Harper <ryanh@us.ibm.com> [2007-04-24 10:30]:> * Keir Fraser <keir@xensource.com> [2007-04-10 04:13]: > > On 10/4/07 02:09, "Ryan Harper" <ryanh@us.ibm.com> wrote: > > > > > nr_nodes : 4 > > > mem_chunks : node0:0x0000000000000000-0x0000000190000000 > > > node1:0x0000000190000000-0x0000000300000000 > > > node2:0x0000000300000000-0x0000000470000000 > > > node3:0x0000000470000000-0x0000000640000000 > > > node_to_cpu : node0:0-7 > > > node1:8-15 > > > node2:16-23 > > > node3:24-31 > > > > > > I''ve also reworked the the physinfo call to contain an array of > > > cpu_to_node elements rather than node_to_cpu to support machines larger > > > than 64-way. I convert the array back to node_to_cpu for brevity in > > > xm info display. > > > > The same would make sense for memory regions (i.e., have a list of > > memory-regions and include a node identifier for each one, rather than > > mapping node-id to memory-region) as this would allow to have multiple > > memory regions per node quite easily. But actually I''m not convinced that > > allowing dom0 to read out the physical addresses of memory regions is at all > > useful -- why would anyone care which particular physical address ranges > > belong to a particular node? The hypercall to find amount of free memory per > > node seems more useful, and probably sufficient by itself. > > Updated. > > - Dropped mem_chunks (removed that from existing ia64 NUMA physinfo) > - Fixed up ia64 cpu_to_node_map array size (was MAX_NUMNODES, now > NR_CPUS) > - Fixed sockets_per_node calculation (was bogus on Opteron systems) > - Updated all arches physinfo call to use num_online_nodes() and new > sockets_per_node calculation > > Untested on ia64, ppc.Refreshed to changeset: 15200:bd3d6b4c52ec -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx (512) 838-9253 T/L: 678-9253 ryanh@us.ibm.com diffstat output: tools/libxc/xc_misc.c | 3 tools/libxc/xenctrl.h | 1 tools/python/xen/lowlevel/xc/xc.c | 61 ++++++++++++++++---- tools/python/xen/xend/XendNode.py | 50 ++++++++++++++++ tools/xenmon/xenbaked.c | 3 tools/xenstat/libxenstat/src/xenstat.c | 3 tools/xentrace/xentrace.c | 3 tools/xm-test/tests/info/02_info_compiledata_pos.py | 4 - xen/arch/ia64/xen/dom0_ops.c | 46 +-------------- xen/arch/powerpc/sysctl.c | 6 - xen/arch/x86/sysctl.c | 33 +++++++++- xen/include/public/sysctl.h | 1 12 files changed, 152 insertions(+), 62 deletions(-) Signed-off-by: Ryan Harper <ryanh@us.ibm.com> --- This patch modifies the physinfo hcall to export NUMA cpu_to_node topology information. The new physinfo hcall is integrated into libxc and xend (xm info specifically). Included in this patch is a minor tweak to xm-test''s xm info testcase. I''ve also fixed the sockets_per_node calculation. The new fields in xm info are: nr_cpus : 32 nr_nodes : 4 sockets_per_node : 4 cores_per_socket : 1 threads_per_core : 2 ... node_to_cpu : node0:0-7 node1:8-15 node2:16-23 node3:24-31 I''ve also reworked the the physinfo call to contain an array of cpu_to_node elements rather than node_to_cpu to support machines larger than 64-ways. I convert the array back to node_to_cpu for brevity in xm info display. Signed-off-by: Ryan Harper <ryanh@us.ibm.com> diff -r 400a3dca237e tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/libxc/xc_misc.c Fri Apr 13 13:04:24 2007 -0500 @@ -59,6 +59,9 @@ int xc_physinfo(int xc_handle, DECLARE_SYSCTL; sysctl.cmd = XEN_SYSCTL_physinfo; + + /* set pointers to caller''s so memcpy doesn''t clobber them */ + sysctl.u.physinfo.cpu_to_node = put_info->cpu_to_node; if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 ) return ret; diff -r 400a3dca237e tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/libxc/xenctrl.h Fri Apr 13 13:04:24 2007 -0500 @@ -473,6 +473,7 @@ int xc_send_debug_keys(int xc_handle, ch int xc_send_debug_keys(int xc_handle, char *keys); typedef xen_sysctl_physinfo_t xc_physinfo_t; +typedef uint32_t xc_cpu_to_node_t; int xc_physinfo(int xc_handle, xc_physinfo_t *info); diff -r 400a3dca237e tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/python/xen/lowlevel/xc/xc.c Fri Apr 13 15:41:39 2007 -0500 @@ -640,14 +640,26 @@ static PyObject *pyxc_pages_to_kib(XcObj } +#define MAX_NR_CPUS 256 static PyObject *pyxc_physinfo(XcObject *self) { xc_physinfo_t info; char cpu_cap[128], *p=cpu_cap, *q=cpu_cap; - int i; + int i,j, nr_cpus; + PyObject *ret_obj, *node_to_cpu_obj; + xc_cpu_to_node_t *map; + + /* make space for cpu_to_node mapping, up to MAX_NR_CPUS cpus */ + map = (xc_cpu_to_node_t *)malloc( sizeof(xc_cpu_to_node_t) * MAX_NR_CPUS); + + set_xen_guest_handle(info.cpu_to_node, map); if ( xc_physinfo(self->xc_handle, &info) != 0 ) return pyxc_error_to_exception(); + + /* calc number of cpus */ + nr_cpus = info.threads_per_core * info.cores_per_socket * + info.sockets_per_node * info.nr_nodes; *q=0; for(i=0;i<sizeof(info.hw_cap)/4;i++) @@ -659,16 +671,43 @@ static PyObject *pyxc_physinfo(XcObject if(q>cpu_cap) *(q-1)=0; - return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}", - "threads_per_core", info.threads_per_core, - "cores_per_socket", info.cores_per_socket, - "sockets_per_node", info.sockets_per_node, - "nr_nodes", info.nr_nodes, - "total_memory", pages_to_kib(info.total_pages), - "free_memory", pages_to_kib(info.free_pages), - "scrub_memory", pages_to_kib(info.scrub_pages), - "cpu_khz", info.cpu_khz, - "hw_caps", cpu_cap); + ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}", + "threads_per_core", info.threads_per_core, + "cores_per_socket", info.cores_per_socket, + "sockets_per_node", info.sockets_per_node, + "total_memory", pages_to_kib(info.total_pages), + "free_memory", pages_to_kib(info.free_pages), + "scrub_memory", pages_to_kib(info.scrub_pages), + "cpu_khz", info.cpu_khz, + "hw_caps", cpu_cap); + + /* node to cpu mappings */ + node_to_cpu_obj = PyList_New(0); + + /* make a list for each node */ + for ( i=0; i<info.nr_nodes; i++) + { + PyObject *cpus = PyList_New(0); + + /* walk the cpu_to_node array, for each cpu + which maps to node i, add to cpus list */ + for ( j=0; j<nr_cpus; j++) + { + /* this cpu j maps to node i */ + if ( i == (uint32_t)map[j]) + PyList_Append(cpus, PyInt_FromLong(j)); + } + PyList_Append(node_to_cpu_obj, cpus); + } + /* add list of node to cpu mappings and nr_nodes to physinfo dictionary */ + PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj); + PyDict_SetItemString(ret_obj, "nr_nodes", + Py_BuildValue("i", info.nr_nodes)); + + /* free malloc''d memory */ + free(map); + + return ret_obj; } static PyObject *pyxc_xeninfo(XcObject *self) diff -r 400a3dca237e tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/python/xen/xend/XendNode.py Fri Apr 13 13:04:24 2007 -0500 @@ -547,6 +547,54 @@ class XendNode: [''version'', ver], [''machine'', mch]] + def list_to_rangepairs(self,cmap): + cmap.sort() + pairs = [] + x = y = 0 + for i in range(0,len(cmap)): + try: + if ((cmap[y+1] - cmap[i]) > 1): + pairs.append((cmap[x],cmap[y])) + x = y = i+1 + else: + y = y + 1 + # if we go off the end, then just add x to y + except IndexError: + pairs.append((cmap[x],cmap[y])) + + return pairs + + def format_pairs(self,pairs): + if not pairs: + return "no cpus" + out = "" + for f,s in pairs: + if (f==s): + out += ''%d''%f + else: + out += ''%d-%d''%(f,s) + out += '','' + # trim trailing '','' + return out[:-1] + + def list_to_strrange(self,list): + return self.format_pairs(self.list_to_rangepairs(list)) + + def format_node_to_cpu(self, pinfo): + str='''' + whitespace='''' + try: + node_to_cpu=pinfo[''node_to_cpu''] + for i in range(0, pinfo[''nr_nodes'']): + str+=''%snode%d:%s\n'' % (whitespace, + i, + self.list_to_strrange(node_to_cpu[i])) + whitespace=''%25s'' % '''' + except: + str=''none\n'' + return str[:-1]; + + def physinfo(self): info = self.xc.physinfo() @@ -559,6 +607,7 @@ class XendNode: # physinfo is in KiB, need it in MiB info[''total_memory''] = info[''total_memory''] / 1024 info[''free_memory''] = info[''free_memory''] / 1024 + info[''node_to_cpu''] = self.format_node_to_cpu(info) ITEM_ORDER = [''nr_cpus'', ''nr_nodes'', @@ -569,6 +618,7 @@ class XendNode: ''hw_caps'', ''total_memory'', ''free_memory'', + ''node_to_cpu'' ] return [[k, info[k]] for k in ITEM_ORDER] diff -r 400a3dca237e tools/xenmon/xenbaked.c --- a/tools/xenmon/xenbaked.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/xenmon/xenbaked.c Fri Apr 13 13:04:24 2007 -0500 @@ -448,6 +448,9 @@ unsigned int get_num_cpus(void) int xc_handle = xc_interface_open(); int ret; + /* ensure node_to_cpu is NULL */ + memset(&physinfo, 0, sizeof(physinfo)); + ret = xc_physinfo(xc_handle, &physinfo); if ( ret != 0 ) diff -r 400a3dca237e tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/xenstat/libxenstat/src/xenstat.c Fri Apr 13 13:04:24 2007 -0500 @@ -147,6 +147,9 @@ xenstat_node *xenstat_get_node(xenstat_h /* Store the handle in the node for later access */ node->handle = handle; + + /* ensure node_to_cpu is NULL */ + memset(&physinfo, 0, sizeof(physinfo)); /* Get information about the physical system */ if (xc_physinfo(handle->xc_handle, &physinfo) < 0) { diff -r 400a3dca237e tools/xentrace/xentrace.c --- a/tools/xentrace/xentrace.c Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/xentrace/xentrace.c Fri Apr 13 13:04:24 2007 -0500 @@ -260,6 +260,9 @@ unsigned int get_num_cpus(void) int xc_handle = xc_interface_open(); int ret; + /* ensure node_to_cpu is NULL */ + memset(&physinfo, 0, sizeof(physinfo)); + ret = xc_physinfo(xc_handle, &physinfo); if ( ret != 0 ) diff -r 400a3dca237e tools/xm-test/tests/info/02_info_compiledata_pos.py --- a/tools/xm-test/tests/info/02_info_compiledata_pos.py Mon Apr 09 12:05:26 2007 +0100 +++ b/tools/xm-test/tests/info/02_info_compiledata_pos.py Fri Apr 13 13:04:24 2007 -0500 @@ -18,9 +18,7 @@ for line in lines: for line in lines: pieces = line.split(" : ", 1) - if len(pieces) < 2: - FAIL("Found invalid line: [%s]" % line) - else: + if len(pieces) > 1: map[pieces[0]] = pieces[1] for field in ["cores_per_socket", "threads_per_core", "cpu_mhz", diff -r 400a3dca237e xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Mon Apr 09 12:05:26 2007 +0100 +++ b/xen/arch/ia64/xen/dom0_ops.c Fri Apr 13 13:20:38 2007 -0500 @@ -239,8 +239,7 @@ long arch_do_sysctl(xen_sysctl_t *op, XE { #ifdef IA64_NUMA_PHYSINFO int i; - node_data_t *chunks; - u64 *map, cpu_to_node_map[MAX_NUMNODES]; + uint32_t *map, cpu_to_node_map[NR_CPUS]; #endif xen_sysctl_physinfo_t *pi = &op->u.physinfo; @@ -249,11 +248,9 @@ long arch_do_sysctl(xen_sysctl_t *op, XE cpus_weight(cpu_sibling_map[0]); pi->cores_per_socket cpus_weight(cpu_core_map[0]) / pi->threads_per_core; - pi->sockets_per_node = - num_online_cpus() / cpus_weight(cpu_core_map[0]); -#ifndef IA64_NUMA_PHYSINFO - pi->nr_nodes = 1; -#endif + pi->nr_nodes = num_online_nodes(); + pi->sockets_per_node = num_online_cpus() / + (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core); pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->scrub_pages = avail_scrub_pages(); @@ -263,41 +260,6 @@ long arch_do_sysctl(xen_sysctl_t *op, XE ret = 0; #ifdef IA64_NUMA_PHYSINFO - /* fetch memory_chunk pointer from guest */ - get_xen_guest_handle(chunks, pi->memory_chunks); - - printk("chunks=%p, num_node_memblks=%u\n", chunks, num_node_memblks); - /* if it is set, fill out memory chunk array */ - if (chunks != NULL) { - if (num_node_memblks == 0) { - /* Non-NUMA machine. Put pseudo-values. */ - node_data_t data; - data.node_start_pfn = 0; - data.node_spanned_pages = total_pages; - data.node_id = 0; - /* copy memory chunk structs to guest */ - if (copy_to_guest_offset(pi->memory_chunks, 0, &data, 1)) { - ret = -EFAULT; - break; - } - } else { - for (i = 0; i < num_node_memblks && i < PUBLIC_MAXCHUNKS; i++) { - node_data_t data; - data.node_start_pfn = node_memblk[i].start_paddr >> - PAGE_SHIFT; - data.node_spanned_pages = node_memblk[i].size >> PAGE_SHIFT; - data.node_id = node_memblk[i].nid; - /* copy memory chunk structs to guest */ - if (copy_to_guest_offset(pi->memory_chunks, i, &data, 1)) { - ret = -EFAULT; - break; - } - } - } - } - /* set number of notes */ - pi->nr_nodes = num_online_nodes(); - /* fetch cpu_to_node pointer from guest */ get_xen_guest_handle(map, pi->cpu_to_node); diff -r 400a3dca237e xen/arch/powerpc/sysctl.c --- a/xen/arch/powerpc/sysctl.c Mon Apr 09 12:05:26 2007 +0100 +++ b/xen/arch/powerpc/sysctl.c Fri Apr 13 13:09:31 2007 -0500 @@ -45,10 +45,10 @@ long arch_do_sysctl(struct xen_sysctl *s cpus_weight(cpu_sibling_map[0]); pi->cores_per_socket cpus_weight(cpu_core_map[0]) / pi->threads_per_core; - pi->sockets_per_node = - num_online_cpus() / cpus_weight(cpu_core_map[0]); + pi->sockets_per_node = num_online_cpus() / + (num_online_nodes() * pi->cores_per_socket * pi->threads_per_core); - pi->nr_nodes = 1; + pi->nr_nodes = num_online_nodes(); pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->cpu_khz = cpu_khz; diff -r 400a3dca237e xen/arch/x86/sysctl.c --- a/xen/arch/x86/sysctl.c Mon Apr 09 12:05:26 2007 +0100 +++ b/xen/arch/x86/sysctl.c Fri Apr 13 13:11:15 2007 -0500 @@ -23,6 +23,10 @@ #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/processor.h> +#include <asm/numa.h> +#include <xen/nodemask.h> + +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) long arch_do_sysctl( struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) @@ -34,16 +38,19 @@ long arch_do_sysctl( case XEN_SYSCTL_physinfo: { + int i; + uint32_t *map, cpu_to_node_map[NR_CPUS]; + xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo; pi->threads_per_core cpus_weight(cpu_sibling_map[0]); pi->cores_per_socket cpus_weight(cpu_core_map[0]) / pi->threads_per_core; - pi->sockets_per_node = - num_online_cpus() / cpus_weight(cpu_core_map[0]); + pi->nr_nodes = num_online_nodes(); + pi->sockets_per_node = num_online_cpus() / + (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core); - pi->nr_nodes = 1; pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->scrub_pages = avail_scrub_pages(); @@ -51,6 +58,26 @@ long arch_do_sysctl( memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4); ret = 0; + + /* fetch cpu_to_node pointer from guest */ + get_xen_guest_handle(map, sysctl->u.physinfo.cpu_to_node); + + /* if set, fill out cpu_to_node array */ + if ( map != NULL ) + { + /* for each cpu, mark in which node the cpu belongs */ + memset(cpu_to_node_map, 0, sizeof(cpu_to_node_map)); + for ( i = 0; i < num_online_cpus(); i++) + { + cpu_to_node_map[i]=cpu_to_node(i); + if ( copy_to_guest_offset(sysctl->u.physinfo.cpu_to_node, + i, &(cpu_to_node_map[i]), 1) ) { + ret = -EFAULT; + break; + } + } + } + if ( copy_to_guest(u_sysctl, sysctl, 1) ) ret = -EFAULT; } diff -r 400a3dca237e xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Mon Apr 09 12:05:26 2007 +0100 +++ b/xen/include/public/sysctl.h Fri Apr 13 13:04:24 2007 -0500 @@ -85,6 +85,7 @@ struct xen_sysctl_physinfo { uint64_aligned_t free_pages; uint64_aligned_t scrub_pages; uint32_t hw_cap[8]; + XEN_GUEST_HANDLE(uint32_t) cpu_to_node; }; typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel