Daniel Kiper
2010-Dec-29 17:07 UTC
[Xen-devel] [PATCH R2 7/7] xen/balloon: Xen memory balloon driver with memory hotplug support
Features and fixes: - new version of memory hotplug patch which supports among others memory allocation policies during errors (try until success or stop at first error), - this version of patch was tested with tmem (selfballooning and frontswap) and works very well with it, - some other minor fixes. Signed-off-by: Daniel Kiper <dkiper@net-space.pl> --- drivers/xen/Kconfig | 10 ++ drivers/xen/balloon.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 221 insertions(+), 11 deletions(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 60d71e9..ada8ef5 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -9,6 +9,16 @@ config XEN_BALLOON the system to expand the domain''s memory allocation, or alternatively return unneeded memory to the system. +config XEN_BALLOON_MEMORY_HOTPLUG + bool "Xen memory balloon driver with memory hotplug support" + default n + depends on XEN_BALLOON && MEMORY_HOTPLUG + help + Xen memory balloon driver with memory hotplug support allows expanding + memory available for the system above limit declared at system startup. + It is very useful on critical systems which require long run without + rebooting. + config XEN_SCRUB_PAGES bool "Scrub pages before returning them to system" depends on XEN_BALLOON diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index f105e67..73abacd 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -6,6 +6,7 @@ * Copyright (c) 2003, B Dragovic * Copyright (c) 2003-2004, M Williamson, K Fraser * Copyright (c) 2005 Dan M. Smith, IBM Corporation + * Copyright (c) 2010 Daniel Kiper * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -44,6 +45,7 @@ #include <linux/list.h> #include <linux/sysdev.h> #include <linux/gfp.h> +#include <linux/memory.h> #include <asm/page.h> #include <asm/pgalloc.h> @@ -79,6 +81,9 @@ enum bp_state { BP_HUNGRY }; +#define MH_POLICY_TRY_UNTIL_SUCCESS 0 +#define MH_POLICY_STOP_AT_FIRST_ERROR 1 + struct balloon_stats { /* We aim for ''current allocation'' == ''target allocation''. */ unsigned long current_pages; @@ -88,6 +93,10 @@ struct balloon_stats { unsigned long balloon_high; unsigned long schedule_delay; unsigned long max_schedule_delay; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + unsigned long boot_max_pfn; + unsigned long mh_policy; +#endif }; static DEFINE_MUTEX(balloon_mutex); @@ -201,18 +210,195 @@ static void update_schedule_delay(enum bp_state state) balloon_stats.schedule_delay = new_schedule_delay; } -static unsigned long current_target(void) +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +static int allocate_memory_resource(struct resource **r, unsigned long nr_pages) { - unsigned long target = balloon_stats.target_pages; + int rc; + resource_size_t r_min, r_size; + + /* + * Look for first unused memory region starting at page + * boundary. Skip last memory section created at boot time + * becuase it may contains unused memory pages with PG_reserved + * bit not set (online_pages require PG_reserved bit set). + */ + + *r = kzalloc(sizeof(struct resource), GFP_KERNEL); - target = min(target, - balloon_stats.current_pages + - balloon_stats.balloon_low + - balloon_stats.balloon_high); + if (!*r) + return -ENOMEM; - return target; + (*r)->name = "System RAM"; + (*r)->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + r_min = PFN_PHYS(section_nr_to_pfn(pfn_to_section_nr(balloon_stats.boot_max_pfn) + 1)); + r_size = nr_pages << PAGE_SHIFT; + + rc = allocate_resource(&iomem_resource, *r, r_size, r_min, + ULONG_MAX, PAGE_SIZE, NULL, NULL); + + if (rc < 0) { + kfree(*r); + *r = NULL; + } + + return rc; } +static void adjust_memory_resource(struct resource **r, unsigned long nr_pages) +{ + if ((*r)->end + 1 - (nr_pages << PAGE_SHIFT) == (*r)->start) { + BUG_ON(release_resource(*r) < 0); + kfree(*r); + *r = NULL; + return; + } + + BUG_ON(adjust_resource(*r, (*r)->start, (*r)->end + 1 - (*r)->start - + (nr_pages << PAGE_SHIFT)) < 0); +} + +static int allocate_additional_memory(struct resource *r, unsigned long nr_pages) +{ + int rc; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + unsigned long flags, i, pfn, pfn_start; + + if (!nr_pages) + return 0; + + pfn_start = PFN_UP(r->end) - nr_pages; + + if (nr_pages > ARRAY_SIZE(frame_list)) + nr_pages = ARRAY_SIZE(frame_list); + + for (i = 0, pfn = pfn_start; i < nr_pages; ++i, ++pfn) + frame_list[i] = pfn; + + set_xen_guest_handle(reservation.extent_start, frame_list); + reservation.nr_extents = nr_pages; + + spin_lock_irqsave(&xen_reservation_lock, flags); + + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); + + if (rc <= 0) + return (rc < 0) ? rc : -ENOMEM; + + for (i = 0, pfn = pfn_start; i < rc; ++i, ++pfn) { + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && + phys_to_machine_mapping_valid(pfn)); + set_phys_to_machine(pfn, frame_list[i]); + } + + spin_unlock_irqrestore(&xen_reservation_lock, flags); + + return rc; +} + +static void hotplug_allocated_memory(struct resource **r) +{ + int nid, rc; + resource_size_t r_size; + struct memory_block *mem; + unsigned long pfn; + + r_size = (*r)->end + 1 - (*r)->start; + nid = memory_add_physaddr_to_nid((*r)->start); + + rc = add_registered_memory(nid, (*r)->start, r_size); + + if (rc) { + pr_err("%s: add_registered_memory: Memory hotplug failed: %i\n", + __func__, rc); + balloon_stats.target_pages = balloon_stats.current_pages; + *r = NULL; + return; + } + + if (xen_pv_domain()) + for (pfn = PFN_DOWN((*r)->start); pfn < PFN_UP((*r)->end); ++pfn) + if (!PageHighMem(pfn_to_page(pfn))) + BUG_ON(HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(pfn_to_mfn(pfn), PAGE_KERNEL), 0)); + + rc = online_pages(PFN_DOWN((*r)->start), r_size >> PAGE_SHIFT); + + if (rc) { + pr_err("%s: online_pages: Failed: %i\n", __func__, rc); + balloon_stats.target_pages = balloon_stats.current_pages; + *r = NULL; + return; + } + + for (pfn = PFN_DOWN((*r)->start); pfn < PFN_UP((*r)->end); pfn += PAGES_PER_SECTION) { + mem = find_memory_block(__pfn_to_section(pfn)); + BUG_ON(!mem); + BUG_ON(!present_section_nr(mem->phys_index)); + mutex_lock(&mem->state_mutex); + mem->state = MEM_ONLINE; + mutex_unlock(&mem->state_mutex); + } + + balloon_stats.current_pages += r_size >> PAGE_SHIFT; + + *r = NULL; +} + +static enum bp_state request_additional_memory(long credit) +{ + int rc; + static struct resource *r; + static unsigned long pages_left; + + if ((credit <= 0 || balloon_stats.balloon_low || + balloon_stats.balloon_high) && !r) + return BP_DONE; + + if (!r) { + rc = allocate_memory_resource(&r, credit); + + if (rc) + return BP_ERROR; + + pages_left = credit; + } + + rc = allocate_additional_memory(r, pages_left); + + if (rc < 0) { + if (balloon_stats.mh_policy == MH_POLICY_TRY_UNTIL_SUCCESS) + return BP_ERROR; + + adjust_memory_resource(&r, pages_left); + + if (!r) + return BP_ERROR; + } else { + pages_left -= rc; + + if (pages_left) + return BP_HUNGRY; + } + + hotplug_allocated_memory(&r); + + return BP_DONE; +} +#else +static enum bp_state request_additional_memory(long credit) +{ + if (balloon_stats.balloon_low && balloon_stats.balloon_high && + balloon_stats.target_pages > balloon_stats.current_pages) + balloon_stats.target_pages = balloon_stats.current_pages; + return BP_DONE; +} +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ + static enum bp_state increase_reservation(unsigned long nr_pages) { enum bp_state state = BP_DONE; @@ -358,15 +544,17 @@ static enum bp_state decrease_reservation(unsigned long nr_pages) */ static void balloon_process(struct work_struct *work) { - enum bp_state rc, state = BP_DONE; + enum bp_state rc, state; long credit; mutex_lock(&balloon_mutex); do { - credit = current_target() - balloon_stats.current_pages; + credit = balloon_stats.target_pages - balloon_stats.current_pages; + + state = request_additional_memory(credit); - if (credit > 0) { + if (credit > 0 && state == BP_DONE) { rc = increase_reservation(credit); state = (rc == BP_ERROR) ? BP_ERROR : state; } @@ -458,6 +646,11 @@ static int __init balloon_init(void) balloon_stats.schedule_delay = 1; balloon_stats.max_schedule_delay = 32; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + balloon_stats.boot_max_pfn = max_pfn; + balloon_stats.mh_policy = MH_POLICY_STOP_AT_FIRST_ERROR; +#endif + register_balloon(&balloon_sysdev); /* Initialise the balloon with excess memory space. */ @@ -501,6 +694,10 @@ BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); static SYSDEV_ULONG_ATTR(schedule_delay, 0644, balloon_stats.schedule_delay); static SYSDEV_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay); +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +static SYSDEV_ULONG_ATTR(memory_hotplug_policy, 0644, balloon_stats.mh_policy); +#endif + static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { @@ -563,7 +760,10 @@ static struct sysdev_attribute *balloon_attrs[] = { &attr_target_kb, &attr_target, &attr_schedule_delay.attr, - &attr_max_schedule_delay.attr + &attr_max_schedule_delay.attr, +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + &attr_memory_hotplug_policy.attr +#endif }; static struct attribute *balloon_info_attrs[] = { -- 1.4.4.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dave Hansen
2011-Jan-03 22:33 UTC
[Xen-devel] Re: [PATCH R2 7/7] xen/balloon: Xen memory balloon driver with memory hotplug support
On Wed, 2010-12-29 at 18:07 +0100, Daniel Kiper wrote:> +config XEN_BALLOON_MEMORY_HOTPLUG > + bool "Xen memory balloon driver with memory hotplug support" > + default n > + depends on XEN_BALLOON && MEMORY_HOTPLUG > + help > + Xen memory balloon driver with memory hotplug support allows expanding > + memory available for the system above limit declared at system startup. > + It is very useful on critical systems which require long run without > + rebooting.This might be better phrased as "Memory hotplug support for Xen balloon driver". It might otherwise confuse people about whether they''re seeing some kind of choice or an _enhancement_ to the existing driver. Also, why bother even making this a config option? What are the downsides if it was always compiled in? You could even make it a non-prompting Kconfig option and just automatically turn it on with XEN_BALLOON && MEMORY_HOTPLUG.> +static int allocate_memory_resource(struct resource **r, unsigned long nr_pages) > { > - unsigned long target = balloon_stats.target_pages; > + int rc; > + resource_size_t r_min, r_size; > + > + /* > + * Look for first unused memory region starting at page > + * boundary. Skip last memory section created at boot time > + * becuase it may contains unused memory pages with PG_reserved > + * bit not set (online_pages require PG_reserved bit set). > + */ > + > + *r = kzalloc(sizeof(struct resource), GFP_KERNEL); > > - target = min(target, > - balloon_stats.current_pages + > - balloon_stats.balloon_low + > - balloon_stats.balloon_high); > + if (!*r) > + return -ENOMEM; > > - return target; > + (*r)->name = "System RAM"; > + (*r)->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > + r_min = PFN_PHYS(section_nr_to_pfn(pfn_to_section_nr(balloon_stats.boot_max_pfn) + 1)); > + r_size = nr_pages << PAGE_SHIFT; > + > + rc = allocate_resource(&iomem_resource, *r, r_size, r_min, > + ULONG_MAX, PAGE_SIZE, NULL, NULL); > + > + if (rc < 0) { > + kfree(*r); > + *r = NULL; > + } > + > + return rc; > }The double-pointer stuff here ends up looking a little funky. Is there any reason you don''t just use ERR_PTRs? That might look a bit more sane.> +static void adjust_memory_resource(struct resource **r, unsigned long nr_pages) > +{ > + if ((*r)->end + 1 - (nr_pages << PAGE_SHIFT) == (*r)->start) { > + BUG_ON(release_resource(*r) < 0);In some kernels, people do: #define BUG_ON(...) do{}while(0) to save space. If anyone ever does that with this code, it''ll break horribly. It''s also hard to read these. So, please break logic actions _out_ of the BUG_ON() arguments. That''s repeated in quite a few places in here. Make sure to go get them all. It also isn''t evident what this patch set is trying to do until you get down to this 7/7 patch. You might want to put a more complete description in 0/7. -- Dave _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Kiper
2011-Jan-11 17:51 UTC
[Xen-devel] Re: [PATCH R2 7/7] xen/balloon: Xen memory balloon driver with memory hotplug support
Hi everybody, Sorry for delays. I could not devote fully my time to this project :-((( (especially at the beginning of this year). I am going to post all replies and new version of patch next week. Please be patient. Daniel _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Kiper
2011-Feb-03 15:31 UTC
[Xen-devel] Re: [PATCH R2 7/7] xen/balloon: Xen memory balloon driver with memory hotplug support
On Mon, Jan 03, 2011 at 02:33:22PM -0800, Dave Hansen wrote:> On Wed, 2010-12-29 at 18:07 +0100, Daniel Kiper wrote: > > +config XEN_BALLOON_MEMORY_HOTPLUG > > + bool "Xen memory balloon driver with memory hotplug support" > > + default n > > + depends on XEN_BALLOON && MEMORY_HOTPLUG > > + help > > + Xen memory balloon driver with memory hotplug support allows expanding > > + memory available for the system above limit declared at system startup. > > + It is very useful on critical systems which require long run without > > + rebooting. > > This might be better phrased as "Memory hotplug support for Xen balloon > driver". It might otherwise confuse people about whether they''re seeing > some kind of choice or an _enhancement_ to the existing driver.Thx. Done. I will send new patch release today.> Also, why bother even making this a config option? What are the > downsides if it was always compiled in? You could even make it a > non-prompting Kconfig option and just automatically turn it on with > XEN_BALLOON && MEMORY_HOTPLUG.At this stage of development I think it is better to leave this as an option for user. Later when this future will be mature it could be removed. However, I realized that this solution does not give possibility for user to disable this future at runtime. Maybe it shoudl be as boot/sysfs option. I am not sure it is required or not. Currently it is not implemented.> > +static int allocate_memory_resource(struct resource **r, unsigned long nr_pages) > > { > > - unsigned long target = balloon_stats.target_pages; > > + int rc; > > + resource_size_t r_min, r_size; > > + > > + /* > > + * Look for first unused memory region starting at page > > + * boundary. Skip last memory section created at boot time > > + * becuase it may contains unused memory pages with PG_reserved > > + * bit not set (online_pages require PG_reserved bit set). > > + */ > > + > > + *r = kzalloc(sizeof(struct resource), GFP_KERNEL); > > > > - target = min(target, > > - balloon_stats.current_pages + > > - balloon_stats.balloon_low + > > - balloon_stats.balloon_high); > > + if (!*r) > > + return -ENOMEM; > > > > - return target; > > + (*r)->name = "System RAM"; > > + (*r)->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > > + r_min = PFN_PHYS(section_nr_to_pfn(pfn_to_section_nr(balloon_stats.boot_max_pfn) + 1)); > > + r_size = nr_pages << PAGE_SHIFT; > > + > > + rc = allocate_resource(&iomem_resource, *r, r_size, r_min, > > + ULONG_MAX, PAGE_SIZE, NULL, NULL); > > + > > + if (rc < 0) { > > + kfree(*r); > > + *r = NULL; > > + } > > + > > + return rc; > > } > > The double-pointer stuff here ends up looking a little funky. Is there > any reason you don''t just use ERR_PTRs? That might look a bit more > sane.After another review I removed all double-pointer stuff (it was very simple, however, if I read many times code written by myself it was very difficult to discover that). Now I think it is much better. Thanks.> > +static void adjust_memory_resource(struct resource **r, unsigned long nr_pages) > > +{ > > + if ((*r)->end + 1 - (nr_pages << PAGE_SHIFT) == (*r)->start) { > > + BUG_ON(release_resource(*r) < 0); > > In some kernels, people do: > > #define BUG_ON(...) do{}while(0) > > to save space. If anyone ever does that with this code, it''ll break > horribly. It''s also hard to read these. So, please break logic actions > _out_ of the BUG_ON() arguments. > > That''s repeated in quite a few places in here. Make sure to go get them > all.Done.> It also isn''t evident what this patch set is trying to do until you get > down to this 7/7 patch. You might want to put a more complete > description in 0/7.Will be done. :-))) Thank you for your help. Daniel _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel