Jiang, Yunhong
2009-Jul-08 07:43 UTC
[Xen-devel] [PATCH V2.0 2/6] Change the boot allocator function to support memory add
This patch extend the page allocation bitmap to support memory add. We extend the page allocation bitmap to support memory add. We take different method for 32/64 system. In x32 , we allocate 512K bitmap to support the whole 16G memory. In x86_64, we don''t change the allocation when system bootup, instead, we try to remap the allocator bitmap to some pre-defined virtual address if hot-add happen. So now the allocation bitmap can be used on several situation: a) Before the page heap allocator is up, the allocation bitmap will be used by boot allocator. b) Normally when page heap allocator is up, the allocation bitmap will be used to track the page allcation status. c) When memory add happen, before that page is transfered to the page heap allocator, it will be solely tracked by allocation bitmap. After transfered to page heap allocator, it will be same as item b. In future, we can remove the allocation bitmap in total and use page_info structure to track the page allocation status. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> Signed-off-by: Wang, shane <shane.wang@intel.com> diff -r f887a0c29d43 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Jul 02 01:04:12 2009 +0800 +++ b/xen/arch/x86/mm.c Thu Jul 02 01:04:14 2009 +0800 @@ -155,7 +155,7 @@ struct domain *dom_xen, *dom_io; /* Frame table and its size in pages. */ struct page_info *__read_mostly frame_table; -unsigned long max_page; +unsigned long max_page, max_boot_page; unsigned long total_pages; #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) @@ -207,6 +207,130 @@ void __init init_frametable(void) memset(frame_table, 0, nr_pages << PAGE_SHIFT); } + +#if defined(__x86_64__) +static int boot_alloc_remapped = 0; +static unsigned long mapped_pages = 0; + +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) + +/* + * Extend the boot allocator bitmap and initialize the free/alloc state. + * It avoid copy original bitmap through splitting super page mapping into + * singer page mapping. + * It also use pages between start_pfn/end_pfn as page table page + */ +int arch_extend_allocation_bitmap(unsigned long start_pfn, + unsigned long end_pfn) +{ + unsigned long n_size, n_pages, i, s_map_pfn, e_map_pfn; + + if ( (start_pfn > end_pfn) || (start_pfn < max_page) ) + return -EINVAL; + + if (!boot_alloc_remapped) + { + unsigned long map_start, map_end, map_end_pfn; + unsigned long offset, o_start, o_end, o_size; + + o_start = virt_to_maddr(alloc_bitmap); + + o_size = max_page / 8; + o_size += sizeof(unsigned long); + o_size = round_pgup(o_size); + o_end = o_start + o_size; + + map_start = o_start & ~((1UL << L2_PAGETABLE_SHIFT) -1UL); + map_end = o_end & ~((1UL << L2_PAGETABLE_SHIFT) -1UL); + map_end_pfn = map_end >> PAGE_SHIFT; + + map_pages_to_xen(BOOTALLOC_VIRT_START, map_start >> PAGE_SHIFT, + ((map_end - map_start) >> PAGE_SHIFT) + 1, + PAGE_HYPERVISOR); + + mapped_pages += (map_end - map_start) >> PAGE_SHIFT; + + /* + * For pages in the end, it may not be 2M aligned + * Try to map them as 4k pages, and add some pages to be 2M aligned + * XXX A cleaner way is to make init_boot_allocator 2M aligned, do we + * care about those extra pages? + */ + for (i = 0; i < (1UL << PAGETABLE_ORDER); i++) + { + if (i < (( o_end >> PAGE_SHIFT ) - map_end_pfn)) + map_pages_to_xen(BOOTALLOC_VIRT_START + + (mapped_pages << PAGE_SHIFT), + map_end_pfn + i, 1, + PAGE_HYPERVISOR); + else + { + struct page_info *pg; + pg = alloc_domheap_page(NULL, 0); + if (!pg) + return -ENOMEM; + map_pages_to_xen(BOOTALLOC_VIRT_START + + (mapped_pages << PAGE_SHIFT), + page_to_mfn(pg), 1, PAGE_HYPERVISOR); + /* Mark the new created dummy page as allocated */ + memset((void *)(BOOTALLOC_VIRT_START + (mapped_pages << PAGE_SHIFT)), 0xFFU, 1UL << PAGE_SHIFT); + } + mapped_pages++; + } + + offset = o_start & ((1UL << L2_PAGETABLE_SHIFT) - 1UL); + + alloc_bitmap = (unsigned long *)(BOOTALLOC_VIRT_START + offset); + smp_mb(); + boot_alloc_remapped = 1; + } + + n_size = end_pfn / 8; + n_size += sizeof(unsigned long); + n_size = round_pgup(n_size); + n_pages = ((unsigned long)alloc_bitmap + n_size - BOOTALLOC_VIRT_START) >> + PAGE_SHIFT; + n_pages += ((1UL << PAGETABLE_ORDER) - 1); + n_pages &= ~((1UL << PAGETABLE_ORDER) - 1); + + ASSERT(!(mapped_pages & ((1UL << PAGETABLE_ORDER) -1)) ); + if ( n_pages <= mapped_pages) + { + map_free(start_pfn, end_pfn - start_pfn + 1); + return start_pfn; + } + + s_map_pfn = start_pfn + (1UL << PAGETABLE_ORDER) - 1; + s_map_pfn = start_pfn & ~((1UL << PAGETABLE_ORDER) - 1); + e_map_pfn = s_map_pfn; + for ( i = mapped_pages; i <= n_pages; i += (1UL << PAGETABLE_ORDER) ) + { + map_pages_to_xen(BOOTALLOC_VIRT_START+ + (i << PAGE_SHIFT), + s_map_pfn + (i << PAGETABLE_ORDER ), + 1UL << PAGETABLE_ORDER, + PAGE_HYPERVISOR); + e_map_pfn += (i << PAGETABLE_ORDER); + } + + /* All new allocated page are marked as allocated initialy */ + memset((unsigned char *)(BOOTALLOC_VIRT_START + + (mapped_pages << PAGE_SHIFT)), + 0xFFU, (n_pages - mapped_pages) << PAGE_SHIFT); + mapped_pages = n_pages; + map_free(start_pfn, s_map_pfn - start_pfn); + map_free(e_map_pfn, end_pfn - e_map_pfn + 1); + + return start_pfn; +} +#else +int arch_extend_allocation_bitmap(unsigned long start_pfn, + unsigned long end_pfn) +{ + map_free(start_pfn, end_pfn - start_pfn + 1); + return start_pfn; +} +#endif void __init arch_init_memory(void) { @@ -4402,8 +4526,6 @@ int ptwr_do_page_fault(struct vcpu *v, u void free_xen_pagetable(void *v) { - extern int early_boot; - if ( early_boot ) return; diff -r f887a0c29d43 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Jul 02 01:04:12 2009 +0800 +++ b/xen/arch/x86/setup.c Thu Jul 02 01:04:14 2009 +0800 @@ -765,6 +765,9 @@ void __init __start_xen(unsigned long mb reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end); /* Initialise boot heap. */ +#if defined(CONFIG_X86_32) && defined(CONFIG_MEMORY_HOTPLUG) + max_boot_page = 16UL << (30 - PAGE_SHIFT); +#endif allocator_bitmap_end = init_boot_allocator(__pa(&_end)); #if defined(CONFIG_X86_32) xenheap_initial_phys_start = allocator_bitmap_end; diff -r f887a0c29d43 xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Thu Jul 02 01:04:12 2009 +0800 +++ b/xen/arch/x86/x86_32/mm.c Thu Jul 02 01:04:14 2009 +0800 @@ -42,7 +42,6 @@ static unsigned long mpt_size; void *alloc_xen_pagetable(void) { - extern int early_boot; extern unsigned long xenheap_initial_phys_start; unsigned long mfn; diff -r f887a0c29d43 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Thu Jul 02 01:04:12 2009 +0800 +++ b/xen/arch/x86/x86_64/mm.c Thu Jul 02 01:04:14 2009 +0800 @@ -57,7 +57,6 @@ l2_pgentry_t __attribute__ ((__section__ void *alloc_xen_pagetable(void) { - extern int early_boot; unsigned long mfn; if ( !early_boot ) diff -r f887a0c29d43 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Thu Jul 02 01:04:12 2009 +0800 +++ b/xen/common/page_alloc.c Thu Jul 02 01:05:14 2009 +0800 @@ -103,7 +103,7 @@ unsigned long *alloc_bitmap; * *_off == Bit offset within an element of the `alloc_bitmap'' array. */ -static void map_alloc(unsigned long first_page, unsigned long nr_pages) +void map_alloc(unsigned long first_page, unsigned long nr_pages) { unsigned long start_off, end_off, curr_idx, end_idx; @@ -131,7 +131,7 @@ static void map_alloc(unsigned long firs } } -static void map_free(unsigned long first_page, unsigned long nr_pages) +void map_free(unsigned long first_page, unsigned long nr_pages) { unsigned long start_off, end_off, curr_idx, end_idx; @@ -178,15 +178,33 @@ paddr_t __init init_boot_allocator(paddr * Allocate space for the allocation bitmap. Include an extra longword * of padding for possible overrun in map_alloc and map_free. */ - bitmap_size = max_page / 8; + bitmap_size = (max_boot_page ? max_boot_page : max_page) / 8; bitmap_size += sizeof(unsigned long); bitmap_size = round_pgup(bitmap_size); alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start); /* All allocated by default. */ memset(alloc_bitmap, ~0, bitmap_size); + max_boot_page = max_page; return bitmap_start + bitmap_size; +} + +int extend_allocation_bitmap(unsigned long spfn, unsigned long epfn) +{ + int rc; + + if (epfn < max_page) + return 0; + + rc = arch_extend_allocation_bitmap(spfn, epfn); + + if (rc < 0) + return rc; + + max_boot_page = epfn; + + return 0; } void __init init_boot_pages(paddr_t ps, paddr_t pe) @@ -235,15 +253,22 @@ void __init init_boot_pages(paddr_t ps, } } -unsigned long __init alloc_boot_pages( - unsigned long nr_pfns, unsigned long pfn_align) +/* + * allocate nr_pfn with "align" alignement between start ~ end + */ +unsigned long alloc_in_allocation_bitmap(unsigned long start, unsigned long end, + unsigned long nr_pfns, unsigned long align) { unsigned long pg, i; - - /* Search backwards to obtain highest available range. */ - for ( pg = (max_page - nr_pfns) & ~(pfn_align - 1); - pg >= first_valid_mfn; - pg = (pg + i - nr_pfns) & ~(pfn_align - 1) ) + if (start < first_valid_mfn) + start = first_valid_mfn; + + if ( (end > max_boot_page) ) + end = max_boot_page; + + for (pg = (end - nr_pfns) & ~(align - 1); + pg >= start; + pg = (pg + i - nr_pfns) & ~(align - 1) ) { for ( i = 0; i < nr_pfns; i++ ) if ( allocated_in_map(pg+i) ) @@ -258,6 +283,11 @@ unsigned long __init alloc_boot_pages( return 0; } +unsigned long __init alloc_boot_pages( + unsigned long nr_pfns, unsigned long pfn_align) +{ + return alloc_in_allocation_bitmap(first_valid_mfn, max_page, nr_pfns, pfn_align); +} /************************* @@ -834,7 +864,7 @@ int query_page_offline(unsigned long mfn * latter is not on a MAX_ORDER boundary, then we reserve the page by * not freeing it to the buddy allocator. */ -static void init_heap_pages( +void init_heap_pages( struct page_info *pg, unsigned long nr_pages) { unsigned int nid_curr, nid_prev; @@ -894,15 +924,15 @@ static unsigned long avail_heap_pages( } #define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn)) -void __init end_boot_allocator(void) +void transfer_pages_to_heap_allocator(unsigned long spfn, unsigned long epfn) { unsigned long i, nr = 0; int curr_free, next_free; /* Pages that are free now go to the domain sub-allocator. */ - if ( (curr_free = next_free = avail_for_domheap(first_valid_mfn)) ) - map_alloc(first_valid_mfn, 1); - for ( i = first_valid_mfn; i < max_page; i++ ) + if ( (curr_free = next_free = avail_for_domheap(spfn)) ) + map_alloc(spfn, 1); + for ( i = spfn; i < epfn; i++ ) { curr_free = next_free; next_free = avail_for_domheap(i+1); @@ -918,6 +948,11 @@ void __init end_boot_allocator(void) } if ( nr ) init_heap_pages(mfn_to_page(i - nr), nr); +} + +void __init end_boot_allocator(void) +{ + transfer_pages_to_heap_allocator(first_valid_mfn, max_page); if ( !dma_bitsize && (num_online_nodes() > 1) ) { diff -r f887a0c29d43 xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Thu Jul 02 01:04:12 2009 +0800 +++ b/xen/include/asm-ia64/mm.h Thu Jul 02 01:04:14 2009 +0800 @@ -274,6 +274,12 @@ static inline int get_page_and_type(stru } return rc; +} + +int arch_extend_allocation_bitmap(unsigned long start_pfn, + unsigned long end_pfn) +{ + return -ENOSYS; } #define set_machinetophys(_mfn, _pfn) do { } while(0); diff -r f887a0c29d43 xen/include/asm-x86/config.h --- a/xen/include/asm-x86/config.h Thu Jul 02 01:04:12 2009 +0800 +++ b/xen/include/asm-x86/config.h Thu Jul 02 01:04:14 2009 +0800 @@ -162,7 +162,9 @@ extern unsigned int video_mode, video_fl * High read-only compatibility machine-to-phys translation table. * 0xffff828c80000000 - 0xffff828cbfffffff [1GB, 2^30 bytes, PML4:261] * Xen text, static data, bss. - * 0xffff828cc0000000 - 0xffff82ffffffffff [461GB, PML4:261] + * 0xffff828cc0000000 - 0xffff82cfffffffff [1GB, 2^30 bytes, PML4:261] + * Boot allocator bitmap + * 0xffff828cd0000000 - 0xffff82ffffffffff [460GB, PML4:261] * Reserved for future use. * 0xffff830000000000 - 0xffff83ffffffffff [1TB, 2^40 bytes, PML4:262-263] * 1:1 direct mapping of all physical memory. @@ -230,6 +232,9 @@ extern unsigned int video_mode, video_fl /* Slot 261: xen text, static data and bss (1GB). */ #define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END) #define XEN_VIRT_END (XEN_VIRT_START + (1UL << 30)) +/* Slot 261: Boot allocator bitmap (1GB) */ +#define BOOTALLOC_VIRT_START XEN_VIRT_END +#define BOOTALLOC_VIRT_END (BOOTALLOC_VIRT_START + (1UL << 30)) /* Slot 262-263: A direct 1:1 mapping of all of physical memory. */ #define DIRECTMAP_VIRT_START (PML4_ADDR(262)) #define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2) diff -r f887a0c29d43 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Thu Jul 02 01:04:12 2009 +0800 +++ b/xen/include/asm-x86/mm.h Thu Jul 02 01:04:14 2009 +0800 @@ -266,9 +266,13 @@ extern void share_xen_page_with_privileg struct page_info *page, int readonly); extern struct page_info *frame_table; -extern unsigned long max_page; +extern unsigned long max_page, max_boot_page; extern unsigned long total_pages; + +extern int early_boot; void init_frametable(void); +int arch_extend_allocation_bitmap(unsigned long start_pfn, + unsigned long end_pfn); int free_page_type(struct page_info *page, unsigned long type, int preemptible); diff -r f887a0c29d43 xen/include/xen/mm.h --- a/xen/include/xen/mm.h Thu Jul 02 01:04:12 2009 +0800 +++ b/xen/include/xen/mm.h Thu Jul 02 01:04:14 2009 +0800 @@ -38,10 +38,18 @@ struct page_info; /* Boot-time allocator. Turns into generic allocator after bootstrap. */ paddr_t init_boot_allocator(paddr_t bitmap_start); +void map_free(unsigned long first_page, unsigned long nr_pages); +void map_alloc(unsigned long first_page, unsigned long nr_pages); void init_boot_pages(paddr_t ps, paddr_t pe); unsigned long alloc_boot_pages( unsigned long nr_pfns, unsigned long pfn_align); +unsigned long alloc_in_allocation_bitmap(unsigned long start, unsigned long end, + unsigned long nr_pfns, unsigned long align); +void init_heap_pages( + struct page_info *pg, unsigned long nr_pages); void end_boot_allocator(void); +void transfer_pages_to_heap_allocator(unsigned long spfn, unsigned long epfn); +int extend_allocation_bitmap(unsigned long spfn, unsigned long epfn); /* Xen suballocator. These functions are interrupt-safe. */ void init_xenheap_pages(paddr_t ps, paddr_t pe); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel