Dave McCracken
2009-Nov-04 13:48 UTC
[Xen-devel] [PATCH xen.git] Add hugepage support to balloon driver
This patch adds hugepage support to the balloon driver. It is activated by specifying "balloon_hugepages" on the kernel command line. Once activated, the balloon driver will work entirely in hugepage sized chunks. If, when returning pages, it finds a hugepage that is not contiguous at the machine level, it will return each underlying page separately. When this page is later repopulated it will be contiguous. Signed-off-by: Dave McCracken <dave.mccracken@oracle.com> -------- balloon.c | 171 +++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 125 insertions(+), 46 deletions(-) --- 2.6-xen/drivers/xen/balloon.c 2009-10-29 17:48:30.000000000 -0500 +++ 2.6-xen-balloon/drivers/xen/balloon.c 2009-10-29 19:14:33.000000000 -0500 @@ -59,7 +59,7 @@ #include <xen/features.h> #include <xen/page.h> -#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10)) #define BALLOON_CLASS_NAME "xen_memory" @@ -85,6 +85,14 @@ static int register_balloon(struct sys_d static struct balloon_stats balloon_stats; +/* + * Work in pages of this order. Can be either 0 for normal pages + * or 9 for hugepages. + */ +static int balloon_order; +static unsigned long balloon_npages; +static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)]; + /* We increase/decrease in batches which fit in a page */ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; @@ -113,10 +121,41 @@ static struct timer_list balloon_timer; static void scrub_page(struct page *page) { #ifdef CONFIG_XEN_SCRUB_PAGES - clear_highpage(page); + int i; + + for (i = 0; i < balloon_npages; i++) + clear_highpage(page++); #endif } +static void free_discontig_frame(void) +{ + int rc; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .domid = DOMID_SELF, + .nr_extents = balloon_npages, + .extent_order = 0 + }; + + set_xen_guest_handle(reservation.extent_start, discontig_frame_list); + rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(rc != balloon_npages); +} + +static unsigned long shrink_frame(unsigned long nr_pages) +{ + unsigned long i, j; + + for (i = 0, j = 0; i < nr_pages; i++, j++) { + if (frame_list[i] == 0) + j++; + if (i != j) + frame_list[i] = frame_list[j]; + } + return i; +} + /* balloon_append: add the given page to the balloon. */ static void balloon_append(struct page *page) { @@ -190,12 +229,11 @@ static unsigned long current_target(void static int increase_reservation(unsigned long nr_pages) { - unsigned long pfn, i, flags; + unsigned long pfn, mfn, i, j, flags; struct page *page; long rc; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, .domid = DOMID_SELF }; @@ -207,12 +245,14 @@ static int increase_reservation(unsigned page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); - frame_list[i] = page_to_pfn(page);; + frame_list[i] = page_to_pfn(page); page = balloon_next_page(page); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; + reservation.extent_order = balloon_order; + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); if (rc < 0) goto out; @@ -222,19 +262,22 @@ static int increase_reservation(unsigned BUG_ON(page == NULL); pfn = page_to_pfn(page); + mfn = frame_list[i]; BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); - set_phys_to_machine(pfn, frame_list[i]); + for (j = 0; j < balloon_npages; j++, pfn++, mfn++) { + set_phys_to_machine(pfn, mfn); - /* Link back into the page tables if not highmem. */ - if (pfn < max_low_pfn) { - int ret; - ret = HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - mfn_pte(frame_list[i], PAGE_KERNEL), - 0); - BUG_ON(ret); + /* Link back into the page tables if not highmem. */ + if (pfn < max_low_pfn) { + int ret; + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(mfn, PAGE_KERNEL), + 0); + BUG_ON(ret); + } } /* Relinquish the page back to the allocator. */ @@ -253,13 +296,13 @@ static int increase_reservation(unsigned static int decrease_reservation(unsigned long nr_pages) { - unsigned long pfn, i, flags; + unsigned long pfn, lpfn, mfn, i, j, flags; struct page *page; int need_sleep = 0; - int ret; + int discontig, discontig_free; + int ret; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, .domid = DOMID_SELF }; @@ -267,7 +310,7 @@ static int decrease_reservation(unsigned nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { - if ((page = alloc_page(GFP_BALLOON)) == NULL) { + if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) { nr_pages = i; need_sleep = 1; break; @@ -277,14 +320,6 @@ static int decrease_reservation(unsigned frame_list[i] = pfn_to_mfn(pfn); scrub_page(page); - - if (!PageHighMem(page)) { - ret = HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - __pte_ma(0), 0); - BUG_ON(ret); - } - } /* Ensure that ballooned highmem pages don''t have kmaps. */ @@ -295,18 +330,39 @@ static int decrease_reservation(unsigned /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { - pfn = mfn_to_pfn(frame_list[i]); - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + mfn = frame_list[i]; + lpfn = pfn = mfn_to_pfn(mfn); balloon_append(pfn_to_page(pfn)); + discontig_free = 0; + for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { + if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) != mfn) + discontig_free = 1; + + set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); + if (!PageHighMem(page)) { + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(lpfn << PAGE_SHIFT), + __pte_ma(0), 0); + BUG_ON(ret); + } + } + if (discontig_free) { + free_discontig_frame(); + frame_list[i] = 0; + discontig = 1; + } } + balloon_stats.current_pages -= nr_pages; + + if (discontig) + nr_pages = shrink_frame(nr_pages); set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; + reservation.extent_order = balloon_order; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); - balloon_stats.current_pages -= nr_pages; - spin_unlock_irqrestore(&xen_reservation_lock, flags); return need_sleep; @@ -374,7 +430,7 @@ static void watch_target(struct xenbus_w /* The given memory/target value is in KiB, so it needs converting to * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ - balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); + balloon_set_new_target(new_target >> ((PAGE_SHIFT - 10) + balloon_order)); } static int balloon_init_watcher(struct notifier_block *notifier, @@ -399,9 +455,12 @@ static int __init balloon_init(void) if (!xen_pv_domain()) return -ENODEV; - pr_info("xen_balloon: Initialising balloon driver.\n"); + pr_info("xen_balloon: Initialising balloon driver with page order %d.\n", + balloon_order); + + balloon_npages = 1 << balloon_order; - balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); + balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) >> balloon_order; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -414,7 +473,7 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); /* Initialise the balloon with excess memory space. */ - for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn += balloon_npages) { if (page_is_ram(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) @@ -464,16 +523,20 @@ static int dealloc_pte_fn(pte_t *pte, st struct page **alloc_empty_pages_and_pagevec(int nr_pages) { struct page *page, **pagevec; - int i, ret; + int npages; + int i, j, ret; + + /* Round up to next number of balloon_order pages */ + npages = (nr_pages + (balloon_npages-1)) >> balloon_order; - pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); + pagevec = kmalloc(sizeof(page) * nr_pages << balloon_order, GFP_KERNEL); if (pagevec == NULL) return NULL; for (i = 0; i < nr_pages; i++) { void *v; - page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD); + page = alloc_pages(GFP_KERNEL|__GFP_COLD, balloon_order); if (page == NULL) goto err; @@ -484,8 +547,8 @@ struct page **alloc_empty_pages_and_page v = page_address(page); ret = apply_to_page_range(&init_mm, (unsigned long)v, - PAGE_SIZE, dealloc_pte_fn, - NULL); + PAGE_SIZE << balloon_order, + dealloc_pte_fn, NULL); if (ret != 0) { mutex_unlock(&balloon_mutex); @@ -493,8 +556,10 @@ struct page **alloc_empty_pages_and_page __free_page(page); goto err; } + for (j = 0; j < balloon_npages; j++) + pagevec[(i<<balloon_order)+j] = page++; - totalram_pages = --balloon_stats.current_pages; + totalram_pages = balloon_stats.current_pages -= balloon_npages; mutex_unlock(&balloon_mutex); } @@ -507,7 +572,7 @@ struct page **alloc_empty_pages_and_page err: mutex_lock(&balloon_mutex); while (--i >= 0) - balloon_append(pagevec[i]); + balloon_append(pagevec[i << balloon_order]); mutex_unlock(&balloon_mutex); kfree(pagevec); pagevec = NULL; @@ -517,15 +582,21 @@ EXPORT_SYMBOL_GPL(alloc_empty_pages_and_ void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) { + struct page *page; int i; + int npages; if (pagevec == NULL) return; + /* Round up to next number of balloon_order pages */ + npages = (nr_pages + (balloon_npages-1)) >> balloon_order; + mutex_lock(&balloon_mutex); for (i = 0; i < nr_pages; i++) { - BUG_ON(page_count(pagevec[i]) != 1); - balloon_append(pagevec[i]); + page = pagevec[i << balloon_order]; + BUG_ON(page_count(page) != 1); + balloon_append(page); } mutex_unlock(&balloon_mutex); @@ -535,6 +606,14 @@ void free_empty_pages_and_pagevec(struct } EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec); +static int __init balloon_parse_huge(char *s) +{ + balloon_order = 9; + return 1; +} + +__setup("balloon_hugepages", balloon_parse_huge); + #define BALLOON_SHOW(name, format, args...) \ static ssize_t show_##name(struct sys_device *dev, \ struct sysdev_attribute *attr, \ @@ -568,7 +647,7 @@ static ssize_t store_target_kb(struct sy target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; - balloon_set_new_target(target_bytes >> PAGE_SHIFT); + balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order)); return count; } @@ -582,7 +661,7 @@ static ssize_t show_target(struct sys_de { return sprintf(buf, "%llu\n", (unsigned long long)balloon_stats.target_pages - << PAGE_SHIFT); + << (PAGE_SHIFT + balloon_order)); } static ssize_t store_target(struct sys_device *dev, @@ -598,7 +677,7 @@ static ssize_t store_target(struct sys_d target_bytes = memparse(buf, &endchar); - balloon_set_new_target(target_bytes >> PAGE_SHIFT); + balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order)); return count; } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel