Jiang, Yunhong
2009-Jun-28 09:27 UTC
[Xen-devel] [PATCH 4/6] Change the m2p table initialization to support memory add
When new memory added to the system, we need to update the m2p table to cover the new memory range. This patch seperats the setup_m2p_table() from paging_init, and make it works for memory add. When memory add, we try to allocate the memory from the new added memory range, so that page tables for m2p table will be in the same node as new memory range. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Signed-off-by: Wang, shane <shane.wang@intel.com> diff -r bf0bbbcf7caf xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Thu Jun 25 21:42:54 2009 +0800 +++ b/xen/arch/x86/x86_32/mm.c Sun Jun 28 02:43:15 2009 +0800 @@ -67,10 +67,55 @@ void *do_page_walk(struct vcpu *v, unsig return NULL; } +unsigned long mpt_pages = 0; + +int setup_m2p_table(unsigned long spfn, unsigned long epfn, int hotplug) +{ + unsigned long i, pg; + struct page_info *l1_pg; + + if (spfn > epfn) + return -EINVAL; + ASSERT(!(mpt_pages & ((1UL << PAGETABLE_ORDER) - 1))); + mpt_size = (epfn * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; + mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); + if ( (mpt_size >> PAGE_SHIFT) <= mpt_pages) + return 0; + + for ( i= (mpt_pages >> PAGETABLE_ORDER); + i < (mpt_size >> L2_PAGETABLE_SHIFT); + i++ ) + { + if (hotplug) + { + pg = alloc_boot_page_range(spfn, epfn, PAGETABLE_ORDER, PAGETABLE_ORDER); + l1_pg = pg ? mfn_to_page(pg) : NULL; + } + else + l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0); + + if (!l1_pg) + return -ENOMEM; + + l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i], + l2e_from_page(l1_pg, PAGE_HYPERVISOR | _PAGE_PSE)); + /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ + l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i], + l2e_from_page( + l1_pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW)); + mpt_pages += (1UL << PAGETABLE_ORDER); + } + + /* Fill with an obvious debug pattern. */ + for ( i = spfn; i < (mpt_size /BYTES_PER_LONG); i++) + set_gpfn_from_mfn(i, 0x55555555); + + return 0; +} + void __init paging_init(void) { unsigned long v; - struct page_info *pg; int i; if ( cpu_has_pge ) @@ -93,23 +138,8 @@ void __init paging_init(void) * Allocate and map the machine-to-phys table and create read-only mapping * of MPT for guest-OS use. */ - mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; - mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); - for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) - { - if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL ) + if (setup_m2p_table(0, max_page, 0) == -ENOMEM) panic("Not enough memory to bootstrap Xen.\n"); - l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i], - l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE)); - /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ - l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i], - l2e_from_page( - pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW)); - } - - /* Fill with an obvious debug pattern. */ - for ( i = 0; i < (mpt_size / BYTES_PER_LONG); i++) - set_gpfn_from_mfn(i, 0x55555555); /* Create page tables for ioremap()/map_domain_page_global(). */ for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) diff -r bf0bbbcf7caf xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Thu Jun 25 21:42:54 2009 +0800 +++ b/xen/arch/x86/x86_64/mm.c Sun Jun 28 02:42:26 2009 +0800 @@ -143,69 +143,92 @@ void *do_page_walk(struct vcpu *v, unsig return mfn_to_virt(mfn) + (addr & ~PAGE_MASK); } -void __init paging_init(void) +/* The total pages allocated for m2p table */ +unsigned long mpt_pages, compat_mpt_pages; +/* XXX TBD to support discontinous m2p table */ +int setup_m2p_table(unsigned long spfn, unsigned long epfn, int hotplug) { unsigned long i, mpt_size, va; unsigned int memflags; - l3_pgentry_t *l3_ro_mpt; + unsigned long pg; + l3_pgentry_t *l3_ro_mpt, l3_ro_e; l2_pgentry_t *l2_ro_mpt = NULL; - struct page_info *l1_pg, *l2_pg, *l3_pg; - - /* Create user-accessible L2 directory to map the MPT for guests. */ - if ( (l3_pg = alloc_domheap_page(NULL, 0)) == NULL ) - goto nomem; - l3_ro_mpt = page_to_virt(l3_pg); - clear_page(l3_ro_mpt); - l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)], - l4e_from_page(l3_pg, __PAGE_HYPERVISOR | _PAGE_USER)); - - /* - * Allocate and map the machine-to-phys table. - * This also ensures L3 is present for fixmaps. - */ - mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; + struct page_info *l1_pg, *l2_pg; + + mpt_size = (epfn * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); - for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) - { - BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1)); + + /* Has already setup */ + if ( (mpt_size >> PAGE_SHIFT) <= mpt_pages ) + return 0; + + l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset( + RO_MPT_VIRT_START)]); + + for ( i = mpt_pages >> PAGETABLE_ORDER; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++) + { va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT); memflags = MEMF_node(phys_to_nid(i << - (L2_PAGETABLE_SHIFT - 3 + PAGE_SHIFT))); - - if ( cpu_has_page1gb && - !((unsigned long)l2_ro_mpt & ~PAGE_MASK) && - (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) && - (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER, - memflags)) != NULL ) - { - map_pages_to_xen( - RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), - page_to_mfn(l1_pg), - 1UL << (2 * PAGETABLE_ORDER), - PAGE_HYPERVISOR); - memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), - 0x77, 1UL << L3_PAGETABLE_SHIFT); - - ASSERT(!l2_table_offset(va)); - /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ - l3e_write(&l3_ro_mpt[l3_table_offset(va)], - l3e_from_page(l1_pg, - /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); - i += (1UL << PAGETABLE_ORDER) - 1; - continue; - } - - if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, - memflags)) == NULL ) + (L2_PAGETABLE_SHIFT - 3 + PAGE_SHIFT))); + + if (cpu_has_page1gb && + !(i & ((1UL << PAGETABLE_ORDER) -1)) && + (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER)) + { + if (hotplug) + { + pg = alloc_boot_page_range(spfn, epfn, + 1UL << 2* PAGETABLE_ORDER, + 1UL << 2*PAGETABLE_ORDER); + l1_pg = pg ? mfn_to_page(pg) : NULL; + } + + else + l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER, + memflags); + if (l1_pg) + { + map_pages_to_xen( + RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), + page_to_mfn(l1_pg), + 1UL << (2 * PAGETABLE_ORDER), + PAGE_HYPERVISOR); + memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), + 0x77, 1UL << L3_PAGETABLE_SHIFT); + + ASSERT(!l2_table_offset(va)); + /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ + l3e_write(&l3_ro_mpt[l3_table_offset(va)], + l3e_from_pfn(l1_pg, + /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); + i += (1UL << PAGETABLE_ORDER) - 1; + mpt_pages += (1UL << (2 * PAGETABLE_ORDER)); + continue; + } + } + + if (hotplug) + { + pg = alloc_boot_page_range(spfn, epfn, + 1UL << PAGETABLE_ORDER, + 1UL << PAGETABLE_ORDER); + l1_pg = pg ? mfn_to_page(pg) : NULL; + } + else + l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, memflags); + + if (!l1_pg) goto nomem; map_pages_to_xen( RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), - page_to_mfn(l1_pg), + page_to_mfn(l1_pg), 1UL << PAGETABLE_ORDER, PAGE_HYPERVISOR); memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55, 1UL << L2_PAGETABLE_SHIFT); - if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) ) + mpt_pages += (1UL << PAGETABLE_ORDER); + l3_ro_e = l3_ro_mpt[l3_table_offset(va)]; + if (!(l3e_get_flags(l3_ro_e) & _PAGE_PRESENT)) { if ( (l2_pg = alloc_domheap_page(NULL, memflags)) == NULL ) goto nomem; @@ -213,26 +236,21 @@ void __init paging_init(void) clear_page(l2_ro_mpt); l3e_write(&l3_ro_mpt[l3_table_offset(va)], l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER)); - ASSERT(!l2_table_offset(va)); - } + } + else + l2_ro_mpt = l3e_to_l2e(l3_ro_e); /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ - l2e_write(l2_ro_mpt, l2e_from_page( - l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); + l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_from_pfn( + page_to_mfn(l1_pg), /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); l2_ro_mpt++; } - /* Create user-accessible L2 directory to map the MPT for compat guests. */ - BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) !- l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)); l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset( - HIRO_COMPAT_MPT_VIRT_START)]); - if ( (l2_pg = alloc_domheap_page(NULL, 0)) == NULL ) - goto nomem; - compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg); - clear_page(l2_ro_mpt); - l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)], - l3e_from_page(l2_pg, __PAGE_HYPERVISOR)); + HIRO_COMPAT_MPT_VIRT_START)]); + + l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)]); l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START); + /* Allocate and map the compatibility mode machine-to-phys table. */ mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1)); if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START ) @@ -240,12 +258,27 @@ void __init paging_init(void) mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END ) m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size; - for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) + + /* Has already setup */ + if ( (mpt_size >> PAGE_SHIFT) <= compat_mpt_pages ) + return 0; + + for ( i = (compat_mpt_pages >> PAGETABLE_ORDER); + i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { memflags = MEMF_node(phys_to_nid(i << (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT))); - if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, - memflags)) == NULL ) + if (hotplug) + { + pg = alloc_boot_page_range(spfn, epfn, + 1UL <<PAGETABLE_ORDER, + 1UL << PAGETABLE_ORDER); + l1_pg = pg ? mfn_to_page(pg) : NULL; + } + else + l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, + memflags); + if (!l1_pg) goto nomem; map_pages_to_xen( RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), @@ -259,8 +292,42 @@ void __init paging_init(void) /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */ l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT)); l2_ro_mpt++; - } - + compat_mpt_pages += (1UL << PAGETABLE_ORDER); + } + return 0; + +nomem: + return -ENOMEM; +} + +void __init paging_init(void) +{ + l3_pgentry_t *l3_ro_mpt; + l2_pgentry_t *l2_ro_mpt = NULL; + struct page_info *l2_pg, *l3_pg; + + /* Create user-accessible L3 directory to map the MPT for guests. */ + if ( (l3_pg = alloc_domheap_page(NULL, 0)) == NULL ) + goto nomem; + l3_ro_mpt = page_to_virt(l3_pg); + clear_page(l3_ro_mpt); + l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)], + l4e_from_page(l3_pg, __PAGE_HYPERVISOR | _PAGE_USER)); + + /* Create user-accessible L2 directory to map the MPT for compat guests. */ + BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) !+ l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)); + l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset( + HIRO_COMPAT_MPT_VIRT_START)]); + if ( (l2_pg = alloc_domheap_page(NULL, 0)) == NULL ) + goto nomem; + compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg); + clear_page(l2_ro_mpt); + l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + l3e_from_page(l2_pg, __PAGE_HYPERVISOR)); + + if (setup_m2p_table(0, max_page, 0)) + goto nomem; /* Set up linear page table mapping. */ l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)], l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR)); diff -r bf0bbbcf7caf xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Thu Jun 25 21:42:54 2009 +0800 +++ b/xen/include/asm-x86/mm.h Sun Jun 28 02:42:31 2009 +0800 @@ -506,6 +506,7 @@ unsigned int domain_clamp_alloc_bitsize( # define domain_clamp_alloc_bitsize(d, b) (b) #endif +int setup_m2p_table(unsigned long spfn, unsigned long epfn, int hotplug); unsigned long domain_get_maximum_gpfn(struct domain *d); extern struct domain *dom_xen, *dom_io; /* for vmcoreinfo */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel