Jiang, Yunhong
2009-Dec-25 10:42 UTC
[Xen-devel] [PATCH] Correct handling node with CPU populated but no memory populated
This patch fix one bug caused by changeset 20599. BTW, when I was working on the patch, I''m really confused by srat_detect_node() and init_cpu_to_node(). Firstly, Per my understanding to the code itself, these two functions achieve the same purpose, but I can''t understanding why srat_detect_node need be called after the CPU is up. As this is backported from kernel, so I checked linux kernel. In linux kernel, the srat_detect_node is mainly for one quirk for AMD platform, but that quirk is not implemented in Xen side. So is the quirk is really needed? Also, are there any special reason that init_cpu_to_node() can''t be called right after numa_initmem_init()? I think it will be cleaner if we setup the whole numa information in one place. Originally I suspect it is related to generic_apic_probe(), but after checking the code, I didn''t find anything related. Did I miss anything? Thanks --jyh # HG changeset patch # User Yunhong Jiang <yunhong.jiang@intel.com> # Date 1261684998 -28800 # Node ID 986dae93aea54f575202dcbaacbf3f4ff05032d4 # Parent 98c4b2498415751f97091631907d9d173ac0092f Correct handling node with CPU populated but no memory populated In changset 20599, the node that has no memory populated is marked parsed, but not online. However, if there are CPU populated in this node, the corresponding CPU mapping (i.e. the cpu_to_node) is still setup to the offline node, this will cause trouble for memory allocation. This patch changes the init_cpu_to_node() and srant_detect_node(), to considering the node is offlined situation. Now the apicid_to_node is only used to keep the mapping between cpu/node provided by BIOS, and should not be used for memory allocation anymore. One thing left is to update the cpu_to_node mapping after memory populated by memory hot-add. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/numa.c --- a/xen/arch/x86/numa.c Thu Dec 24 15:59:44 2009 +0000 +++ b/xen/arch/x86/numa.c Fri Dec 25 04:03:18 2009 +0800 @@ -35,6 +35,9 @@ unsigned char cpu_to_node[NR_CPUS] __rea unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; +/* + * Keep BIOS''s CPU2node information, should not be used for memory allocaion + */ unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; @@ -288,14 +291,16 @@ static __init int numa_setup(char *opt) */ void __devinit init_cpu_to_node(void) { - int i; + int i, node; for (i = 0; i < NR_CPUS; i++) { u32 apicid = x86_cpu_to_apicid[i]; if (apicid == BAD_APICID) continue; - if (apicid_to_node[apicid] == NUMA_NO_NODE) - continue; - numa_set_node(i,apicid_to_node[apicid]); + node = apicid_to_node[apicid]; + + if ( node == NUMA_NO_NODE || !node_online(node) ) + node = 0; + numa_set_node(i, node); } } diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Dec 24 15:59:44 2009 +0000 +++ b/xen/arch/x86/setup.c Fri Dec 25 04:03:18 2009 +0800 @@ -20,6 +20,7 @@ #include <xen/rcupdate.h> #include <xen/vga.h> #include <xen/dmi.h> +#include <xen/nodemask.h> #include <public/version.h> #ifdef CONFIG_COMPAT #include <compat/platform.h> @@ -252,7 +253,7 @@ void __devinit srat_detect_node(int cpu) u32 apicid = x86_cpu_to_apicid[cpu]; node = apicid_to_node[apicid]; - if ( node == NUMA_NO_NODE ) + if ( node == NUMA_NO_NODE || !node_online(node) ) node = 0; numa_set_node(cpu, node); diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Thu Dec 24 15:59:44 2009 +0000 +++ b/xen/arch/x86/smpboot.c Fri Dec 25 04:03:18 2009 +0800 @@ -913,7 +913,7 @@ static int __devinit do_boot_cpu(int api } #else if (!per_cpu(compat_arg_xlat, cpu)) - setup_compat_arg_xlat(cpu, apicid_to_node[apicid]); + setup_compat_arg_xlat(cpu, cpu_to_node[cpu]); #endif if (!idt_tables[cpu]) { diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Thu Dec 24 15:59:44 2009 +0000 +++ b/xen/arch/x86/x86_64/mm.c Fri Dec 25 04:03:18 2009 +0800 @@ -997,7 +997,7 @@ void __init subarch_init_memory(void) } if ( setup_compat_arg_xlat(smp_processor_id(), - apicid_to_node[boot_cpu_physical_apicid]) ) + cpu_to_node[0]) ) panic("Could not setup argument translation area"); } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Dec-26 07:57 UTC
[Xen-devel] Re: [PATCH] Correct handling node with CPU populated but no memory populated
On 25/12/2009 10:42, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> BTW, when I was working on the patch, I''m really confused by > srat_detect_node() and init_cpu_to_node(). > > Firstly, Per my understanding to the code itself, these two functions achieve > the same purpose, but I can''t understanding why srat_detect_node need be > called after the CPU is up. As this is backported from kernel, so I checked > linux kernel. In linux kernel, the srat_detect_node is mainly for one quirk > for AMD platform, but that quirk is not implemented in Xen side. So is the > quirk is really needed? > > Also, are there any special reason that init_cpu_to_node() can''t be called > right after numa_initmem_init()? I think it will be cleaner if we setup the > whole numa information in one place. Originally I suspect it is related to > generic_apic_probe(), but after checking the code, I didn''t find anything > related. Did I miss anything?I have no direct involvement in the NUMA code that got ported, and I think the guy that originally did the port has moved on. If you think it can be cleaned up and improved, please go ahead. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2009-Dec-28 23:26 UTC
[Xen-devel] RE: [PATCH] Correct handling node with CPU populated but no memory populated
Ok, I will take that. --jyh>-----Original Message----- >From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] >Sent: Saturday, December 26, 2009 3:57 PM >To: Jiang, Yunhong >Cc: xen-devel@lists.xensource.com >Subject: Re: [PATCH] Correct handling node with CPU populated but no memory >populated > >On 25/12/2009 10:42, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> BTW, when I was working on the patch, I''m really confused by >> srat_detect_node() and init_cpu_to_node(). >> >> Firstly, Per my understanding to the code itself, these two functions achieve >> the same purpose, but I can''t understanding why srat_detect_node need be >> called after the CPU is up. As this is backported from kernel, so I checked >> linux kernel. In linux kernel, the srat_detect_node is mainly for one quirk >> for AMD platform, but that quirk is not implemented in Xen side. So is the >> quirk is really needed? >> >> Also, are there any special reason that init_cpu_to_node() can''t be called >> right after numa_initmem_init()? I think it will be cleaner if we setup the >> whole numa information in one place. Originally I suspect it is related to >> generic_apic_probe(), but after checking the code, I didn''t find anything >> related. Did I miss anything? > >I have no direct involvement in the NUMA code that got ported, and I think >the guy that originally did the port has moved on. If you think it can be >cleaned up and improved, please go ahead. > > -- Keir >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2010-Jan-04 08:20 UTC
RE: [Xen-devel] [PATCH] Correct handling node with CPU populated but no memory populated
>Firstly, Per my understanding to the code itself, these two functions achieve the >same purpose, but I can''t understanding why srat_detect_node need be called after >the CPU is up. As this is backported from kernel, so I checked linux kernel. In linux >kernel, the srat_detect_node is mainly for one quirk for AMD platform, but that quirk >is not implemented in Xen side. So is the quirk is really needed? > >Also, are there any special reason that init_cpu_to_node() can''t be called right after >numa_initmem_init()? I think it will be cleaner if we setup the whole numa >information in one place. Originally I suspect it is related to generic_apic_probe(), >but after checking the code, I didn''t find anything related. Did I miss anything?Seems I do missed something here after checking the code again. The init_cpu_to_node() depends on the the cpu_to_apicid information, which will be achieved only after the MADT table parser. As for the srat_detect_node(), although it is in fact same to init_cpu_to_node() currently per my understanding, but it is really harmless, so I''d leave it, so that one day some quirks can be added after the CPU is identified already. So seems it does not need any clean at all. Really sorry for the wrong alerm. --jyh> >Thanks >--jyh > ># HG changeset patch ># User Yunhong Jiang <yunhong.jiang@intel.com> ># Date 1261684998 -28800 ># Node ID 986dae93aea54f575202dcbaacbf3f4ff05032d4 ># Parent 98c4b2498415751f97091631907d9d173ac0092f > >Correct handling node with CPU populated but no memory populated > >In changset 20599, the node that has no memory populated is marked parsed, but >not online. However, if there are CPU populated in this node, the corresponding CPU >mapping (i.e. the cpu_to_node) is still setup to the offline node, this will cause >trouble for memory allocation. > >This patch changes the init_cpu_to_node() and srant_detect_node(), to considering >the node is offlined situation. > >Now the apicid_to_node is only used to keep the mapping between cpu/node >provided by BIOS, and should not be used for memory allocation anymore. > >One thing left is to update the cpu_to_node mapping after memory populated by >memory hot-add. > >Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> > >diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/numa.c >--- a/xen/arch/x86/numa.c Thu Dec 24 15:59:44 2009 +0000 >+++ b/xen/arch/x86/numa.c Fri Dec 25 04:03:18 2009 +0800 >@@ -35,6 +35,9 @@ unsigned char cpu_to_node[NR_CPUS] __rea > unsigned char cpu_to_node[NR_CPUS] __read_mostly = { > [0 ... NR_CPUS-1] = NUMA_NO_NODE > }; >+/* >+ * Keep BIOS''s CPU2node information, should not be used for memory allocaion >+ */ > unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { > [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE > }; >@@ -288,14 +291,16 @@ static __init int numa_setup(char *opt) > */ > void __devinit init_cpu_to_node(void) > { >- int i; >+ int i, node; > for (i = 0; i < NR_CPUS; i++) { > u32 apicid = x86_cpu_to_apicid[i]; > if (apicid == BAD_APICID) > continue; >- if (apicid_to_node[apicid] == NUMA_NO_NODE) >- continue; >- numa_set_node(i,apicid_to_node[apicid]); >+ node = apicid_to_node[apicid]; >+ >+ if ( node == NUMA_NO_NODE || !node_online(node) ) >+ node = 0; >+ numa_set_node(i, node); > } > } > >diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/setup.c >--- a/xen/arch/x86/setup.c Thu Dec 24 15:59:44 2009 +0000 >+++ b/xen/arch/x86/setup.c Fri Dec 25 04:03:18 2009 +0800 >@@ -20,6 +20,7 @@ > #include <xen/rcupdate.h> > #include <xen/vga.h> > #include <xen/dmi.h> >+#include <xen/nodemask.h> > #include <public/version.h> > #ifdef CONFIG_COMPAT > #include <compat/platform.h> >@@ -252,7 +253,7 @@ void __devinit srat_detect_node(int cpu) > u32 apicid = x86_cpu_to_apicid[cpu]; > > node = apicid_to_node[apicid]; >- if ( node == NUMA_NO_NODE ) >+ if ( node == NUMA_NO_NODE || !node_online(node) ) > node = 0; > numa_set_node(cpu, node); > >diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/smpboot.c >--- a/xen/arch/x86/smpboot.c Thu Dec 24 15:59:44 2009 +0000 >+++ b/xen/arch/x86/smpboot.c Fri Dec 25 04:03:18 2009 +0800 >@@ -913,7 +913,7 @@ static int __devinit do_boot_cpu(int api > } > #else > if (!per_cpu(compat_arg_xlat, cpu)) >- setup_compat_arg_xlat(cpu, apicid_to_node[apicid]); >+ setup_compat_arg_xlat(cpu, cpu_to_node[cpu]); > #endif > > if (!idt_tables[cpu]) { >diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/x86_64/mm.c >--- a/xen/arch/x86/x86_64/mm.c Thu Dec 24 15:59:44 2009 +0000 >+++ b/xen/arch/x86/x86_64/mm.c Fri Dec 25 04:03:18 2009 +0800 >@@ -997,7 +997,7 @@ void __init subarch_init_memory(void) > } > > if ( setup_compat_arg_xlat(smp_processor_id(), >- apicid_to_node[boot_cpu_physical_apicid]) ) >+ cpu_to_node[0]) ) > panic("Could not setup argument translation area"); > } > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel