Stefano Stabellini
2013-Aug-04 14:38 UTC
[PATCH v4 0/2] make ballooned out pages have a valid mapping at all times
Hi all, this patch series limits problems caused by tcp retransmits on NFS when the original block pages were mapped from a foreign domain and now the mapping is gone. It accomplishes the goal by: 1) mapping all ballooned out pages to a per-cpu "balloon_scratch_page"; 2) making sure that once a grant is unmapped, the original mapping to the per-cpu balloon_scratch_page is restored atomically. The first patch accomplishes (1), the second patch uses GNTTABOP_unmap_and_replace to atomically unmap a grant and restore the original mapping. Changes in this version: - add an early_initcall to clear all the possible per_cpu balloon_scratch_page. Stefano Stabellini (2): xen/balloon: set a mapping for ballooned out pages xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping arch/x86/xen/p2m.c | 22 ++++++++++----- drivers/xen/balloon.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++-- drivers/xen/gntdev.c | 11 +------ include/xen/balloon.h | 3 ++ 4 files changed, 86 insertions(+), 19 deletions(-) git://git.kernel.org/pub/scm/linux/kernel/git/sstabellini/xen.git valid_mapping_4 Cheers, Stefano
Stefano Stabellini
2013-Aug-04 14:39 UTC
[PATCH v4 1/2] xen/balloon: set a mapping for ballooned out pages
Currently ballooned out pages are mapped to 0 and have INVALID_P2M_ENTRY in the p2m. These ballooned out pages are used to map foreign grants by gntdev and blkback (see alloc_xenballooned_pages). Allocate a page per cpu and map all the ballooned out pages to the corresponding mfn. Set the p2m accordingly. This way reading from a ballooned out page won''t cause a kernel crash (see http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html). Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Reviewed-by: David Vrabel <david.vrabel@citrix.com> CC: alex@alex.org.uk CC: dcrisan@flexiant.com --- drivers/xen/balloon.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++-- include/xen/balloon.h | 3 ++ 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 930fb68..d5ff74f 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -36,6 +36,7 @@ * IN THE SOFTWARE. */ +#include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/errno.h> @@ -50,6 +51,7 @@ #include <linux/notifier.h> #include <linux/memory.h> #include <linux/memory_hotplug.h> +#include <linux/percpu-defs.h> #include <asm/page.h> #include <asm/pgalloc.h> @@ -88,6 +90,8 @@ EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; +static DEFINE_PER_CPU(struct page *, balloon_scratch_page); + #ifdef CONFIG_HIGHMEM #define inc_totalhigh_pages() (totalhigh_pages++) @@ -423,7 +427,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - __pte_ma(0), 0); + pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)), + PAGE_KERNEL_RO), 0); BUG_ON(ret); } #endif @@ -436,7 +441,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + __set_phys_to_machine(pfn, + pfn_to_mfn(page_to_pfn(__get_cpu_var(balloon_scratch_page)))); balloon_append(pfn_to_page(pfn)); } @@ -491,6 +497,18 @@ static void balloon_process(struct work_struct *work) mutex_unlock(&balloon_mutex); } +struct page *get_balloon_scratch_page(void) +{ + struct page *ret = get_cpu_var(balloon_scratch_page); + BUG_ON(ret == NULL); + return ret; +} + +void put_balloon_scratch_page(void) +{ + put_cpu_var(balloon_scratch_page); +} + /* Resets the Xen limit, sets new target, and kicks off processing. */ void balloon_set_new_target(unsigned long target) { @@ -584,13 +602,47 @@ static void __init balloon_add_region(unsigned long start_pfn, } } +static int __cpuinit balloon_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + switch (action) { + case CPU_UP_PREPARE: + if (per_cpu(balloon_scratch_page, cpu) != NULL) + break; + per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); + if (per_cpu(balloon_scratch_page, cpu) == NULL) { + pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); + return NOTIFY_BAD; + } + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block balloon_cpu_notifier __cpuinitdata = { + .notifier_call = balloon_cpu_notify, +}; + static int __init balloon_init(void) { - int i; + int i, cpu; if (!xen_domain()) return -ENODEV; + for_each_online_cpu(cpu) + { + per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); + if (per_cpu(balloon_scratch_page, cpu) == NULL) { + pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); + return -ENOMEM; + } + } + register_cpu_notifier(&balloon_cpu_notifier); + pr_info("xen/balloon: Initialising balloon driver.\n"); balloon_stats.current_pages = xen_pv_domain() @@ -627,4 +679,15 @@ static int __init balloon_init(void) subsys_initcall(balloon_init); +static int __init balloon_clear(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(balloon_scratch_page, cpu) = NULL; + + return 0; +} +early_initcall(balloon_clear); + MODULE_LICENSE("GPL"); diff --git a/include/xen/balloon.h b/include/xen/balloon.h index cc2e1a7..a4c1c6a 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -29,6 +29,9 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem); void free_xenballooned_pages(int nr_pages, struct page **pages); +struct page *get_balloon_scratch_page(void); +void put_balloon_scratch_page(void); + struct device; #ifdef CONFIG_XEN_SELFBALLOONING extern int register_xen_selfballooning(struct device *dev); -- 1.7.2.5
Stefano Stabellini
2013-Aug-04 14:39 UTC
[PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 mapping instead of reinstating the original mapping. Doing so separately would be racy. To unmap a grant and reinstate the original mapping atomically we use GNTTABOP_unmap_and_replace. GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping passed in new_addr so we have to reinstate it, however that is a per-cpu mapping only used for balloon scratch pages, so we can be sure that it''s not going to be accessed while the mapping is not valid. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Reviewed-by: David Vrabel <david.vrabel@citrix.com> CC: alex@alex.org.uk CC: dcrisan@flexiant.com --- arch/x86/xen/p2m.c | 22 +++++++++++++++------- drivers/xen/gntdev.c | 11 ++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 95fb2aa..0d4ec35 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -161,6 +161,7 @@ #include <asm/xen/page.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> +#include <xen/balloon.h> #include <xen/grant_table.h> #include "multicalls.h" @@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page, if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; - struct gnttab_unmap_grant_ref *unmap_op; + struct gnttab_unmap_and_replace *unmap_op; + struct page *scratch_page = get_balloon_scratch_page(); + unsigned long scratch_page_address = (unsigned long) + __va(page_to_pfn(scratch_page) << PAGE_SHIFT); /* * It might be that we queued all the m2p grant table @@ -990,21 +994,25 @@ int m2p_remove_override(struct page *page, } mcs = xen_mc_entry( - sizeof(struct gnttab_unmap_grant_ref)); + sizeof(struct gnttab_unmap_and_replace)); unmap_op = mcs.args; unmap_op->host_addr = kmap_op->host_addr; + unmap_op->new_addr = scratch_page_address; unmap_op->handle = kmap_op->handle; - unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, - GNTTABOP_unmap_grant_ref, unmap_op, 1); + GNTTABOP_unmap_and_replace, unmap_op, 1); xen_mc_issue(PARAVIRT_LAZY_MMU); - set_pte_at(&init_mm, address, ptep, - pfn_pte(pfn, PAGE_KERNEL)); - __flush_tlb_single(address); + mcs = __xen_mc_entry(0); + MULTI_update_va_mapping(mcs.mc, scratch_page_address, + pfn_pte(page_to_pfn(get_balloon_scratch_page()), + PAGE_KERNEL_RO), 0); + xen_mc_issue(PARAVIRT_LAZY_MMU); + kmap_op->host_addr = 0; + put_balloon_scratch_page(); } } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 3c8803f..51f4c95 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -270,19 +270,12 @@ static int map_grant_pages(struct grant_map *map) * with find_grant_ptes. */ for (i = 0; i < map->count; i++) { - unsigned level; unsigned long address = (unsigned long) pfn_to_kaddr(page_to_pfn(map->pages[i])); - pte_t *ptep; - u64 pte_maddr = 0; BUG_ON(PageHighMem(map->pages[i])); - ptep = lookup_address(address, &level); - pte_maddr = arbitrary_virt_to_machine(ptep).maddr; - gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, - map->flags | - GNTMAP_host_map | - GNTMAP_contains_pte, + gnttab_set_map_op(&map->kmap_ops[i], address, + map->flags | GNTMAP_host_map, map->grants[i].ref, map->grants[i].domid); } -- 1.7.2.5
David Vrabel
2013-Aug-05 14:53 UTC
Re: [PATCH v4 0/2] make ballooned out pages have a valid mapping at all times
On 04/08/13 15:38, Stefano Stabellini wrote:> Hi all, > this patch series limits problems caused by tcp retransmits on NFS when > the original block pages were mapped from a foreign domain and now the > mapping is gone. > > It accomplishes the goal by: > > 1) mapping all ballooned out pages to a per-cpu "balloon_scratch_page"; > 2) making sure that once a grant is unmapped, the original mapping to > the per-cpu balloon_scratch_page is restored atomically. > > The first patch accomplishes (1), the second patch uses > GNTTABOP_unmap_and_replace to atomically unmap a grant and restore the > original mapping. > > > > Changes in this version: > - add an early_initcall to clear all the possible per_cpu > balloon_scratch_page.I know Konrad asked for this but I don''t think it is necessary. Many other users of DEFINE_PER_CPU() assume the space is initialized to zero. e.g., cpufreq_show_table in drivers/cpufreq/freq_table.c If this is the only change in v4 I would suggest taking the v3 patches instead. David
Konrad Rzeszutek Wilk
2013-Aug-09 15:26 UTC
Re: [PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
On Sun, Aug 04, 2013 at 03:39:41PM +0100, Stefano Stabellini wrote:> GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > mapping instead of reinstating the original mapping. > Doing so separately would be racy. > > To unmap a grant and reinstate the original mapping atomically we use > GNTTABOP_unmap_and_replace. > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping > passed in new_addr so we have to reinstate it, however that is a > per-cpu mapping only used for balloon scratch pages, so we can be sure that > it''s not going to be accessed while the mapping is not valid.This looks to be depend on a new structure, which is not in Linux kernel? Are you missing a dependency patch? Shouldn''t we use some logic to figure out which hypercall to use if the hypervisor does not support it?> > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > Reviewed-by: David Vrabel <david.vrabel@citrix.com> > CC: alex@alex.org.uk > CC: dcrisan@flexiant.com > --- > arch/x86/xen/p2m.c | 22 +++++++++++++++------- > drivers/xen/gntdev.c | 11 ++--------- > 2 files changed, 17 insertions(+), 16 deletions(-) > > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c > index 95fb2aa..0d4ec35 100644 > --- a/arch/x86/xen/p2m.c > +++ b/arch/x86/xen/p2m.c > @@ -161,6 +161,7 @@ > #include <asm/xen/page.h> > #include <asm/xen/hypercall.h> > #include <asm/xen/hypervisor.h> > +#include <xen/balloon.h> > #include <xen/grant_table.h> > > #include "multicalls.h" > @@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page, > if (kmap_op != NULL) { > if (!PageHighMem(page)) { > struct multicall_space mcs; > - struct gnttab_unmap_grant_ref *unmap_op; > + struct gnttab_unmap_and_replace *unmap_op; > + struct page *scratch_page = get_balloon_scratch_page(); > + unsigned long scratch_page_address = (unsigned long) > + __va(page_to_pfn(scratch_page) << PAGE_SHIFT); > > /* > * It might be that we queued all the m2p grant table > @@ -990,21 +994,25 @@ int m2p_remove_override(struct page *page, > } > > mcs = xen_mc_entry( > - sizeof(struct gnttab_unmap_grant_ref)); > + sizeof(struct gnttab_unmap_and_replace)); > unmap_op = mcs.args; > unmap_op->host_addr = kmap_op->host_addr; > + unmap_op->new_addr = scratch_page_address; > unmap_op->handle = kmap_op->handle; > - unmap_op->dev_bus_addr = 0; > > MULTI_grant_table_op(mcs.mc, > - GNTTABOP_unmap_grant_ref, unmap_op, 1); > + GNTTABOP_unmap_and_replace, unmap_op, 1); > > xen_mc_issue(PARAVIRT_LAZY_MMU); > > - set_pte_at(&init_mm, address, ptep, > - pfn_pte(pfn, PAGE_KERNEL)); > - __flush_tlb_single(address); > + mcs = __xen_mc_entry(0); > + MULTI_update_va_mapping(mcs.mc, scratch_page_address, > + pfn_pte(page_to_pfn(get_balloon_scratch_page()), > + PAGE_KERNEL_RO), 0); > + xen_mc_issue(PARAVIRT_LAZY_MMU); > + > kmap_op->host_addr = 0; > + put_balloon_scratch_page(); > } > } > > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > index 3c8803f..51f4c95 100644 > --- a/drivers/xen/gntdev.c > +++ b/drivers/xen/gntdev.c > @@ -270,19 +270,12 @@ static int map_grant_pages(struct grant_map *map) > * with find_grant_ptes. > */ > for (i = 0; i < map->count; i++) { > - unsigned level; > unsigned long address = (unsigned long) > pfn_to_kaddr(page_to_pfn(map->pages[i])); > - pte_t *ptep; > - u64 pte_maddr = 0; > BUG_ON(PageHighMem(map->pages[i])); > > - ptep = lookup_address(address, &level); > - pte_maddr = arbitrary_virt_to_machine(ptep).maddr; > - gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, > - map->flags | > - GNTMAP_host_map | > - GNTMAP_contains_pte, > + gnttab_set_map_op(&map->kmap_ops[i], address, > + map->flags | GNTMAP_host_map, > map->grants[i].ref, > map->grants[i].domid); > } > -- > 1.7.2.5 >
Stefano Stabellini
2013-Aug-13 11:17 UTC
Re: [PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
On Fri, 9 Aug 2013, Konrad Rzeszutek Wilk wrote:> On Sun, Aug 04, 2013 at 03:39:41PM +0100, Stefano Stabellini wrote: > > GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > > mapping instead of reinstating the original mapping. > > Doing so separately would be racy. > > > > To unmap a grant and reinstate the original mapping atomically we use > > GNTTABOP_unmap_and_replace. > > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > > don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping > > passed in new_addr so we have to reinstate it, however that is a > > per-cpu mapping only used for balloon scratch pages, so we can be sure that > > it''s not going to be accessed while the mapping is not valid. > > This looks to be depend on a new structure, which is not in Linux kernel? > Are you missing a dependency patch?Nope, GNTTABOP_unmap_and_replace and struct gnttab_unmap_and_replace are already present in include/xen/interface/grant_table.h.> Shouldn''t we use some logic to figure out which hypercall to use if the > hypervisor does not support it?GNTTABOP_unmap_and_replace is not a new hypercall, it has been supported by Xen for a very long time. In a previous iteration of this patch series, I did introduce a new hypercall, but then I dropped it because I figured out that I could achieve the same thing with the existing hypercall.
Konrad Rzeszutek Wilk
2013-Aug-13 13:11 UTC
Re: [PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
On Tue, Aug 13, 2013 at 12:17:18PM +0100, Stefano Stabellini wrote:> On Fri, 9 Aug 2013, Konrad Rzeszutek Wilk wrote: > > On Sun, Aug 04, 2013 at 03:39:41PM +0100, Stefano Stabellini wrote: > > > GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > > > mapping instead of reinstating the original mapping. > > > Doing so separately would be racy. > > > > > > To unmap a grant and reinstate the original mapping atomically we use > > > GNTTABOP_unmap_and_replace. > > > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > > > don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping > > > passed in new_addr so we have to reinstate it, however that is a > > > per-cpu mapping only used for balloon scratch pages, so we can be sure that > > > it''s not going to be accessed while the mapping is not valid. > > > > This looks to be depend on a new structure, which is not in Linux kernel? > > Are you missing a dependency patch? > > Nope, GNTTABOP_unmap_and_replace and struct gnttab_unmap_and_replace are > already present in include/xen/interface/grant_table.h. > > > > Shouldn''t we use some logic to figure out which hypercall to use if the > > hypervisor does not support it? > > GNTTABOP_unmap_and_replace is not a new hypercall, it has been supported > by Xen for a very long time. > > In a previous iteration of this patch series, I did introduce a new > hypercall, but then I dropped it because I figured out that I could > achieve the same thing with the existing hypercall.OK, Please tack on: Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> P.S. If you could stick it on devel/for-linus-3.12 that would be super. Thanks!
Stefano Stabellini
2013-Aug-13 15:38 UTC
Re: [PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
On Tue, 13 Aug 2013, Konrad Rzeszutek Wilk wrote:> On Tue, Aug 13, 2013 at 12:17:18PM +0100, Stefano Stabellini wrote: > > On Fri, 9 Aug 2013, Konrad Rzeszutek Wilk wrote: > > > On Sun, Aug 04, 2013 at 03:39:41PM +0100, Stefano Stabellini wrote: > > > > GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > > > > mapping instead of reinstating the original mapping. > > > > Doing so separately would be racy. > > > > > > > > To unmap a grant and reinstate the original mapping atomically we use > > > > GNTTABOP_unmap_and_replace. > > > > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > > > > don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping > > > > passed in new_addr so we have to reinstate it, however that is a > > > > per-cpu mapping only used for balloon scratch pages, so we can be sure that > > > > it''s not going to be accessed while the mapping is not valid. > > > > > > This looks to be depend on a new structure, which is not in Linux kernel? > > > Are you missing a dependency patch? > > > > Nope, GNTTABOP_unmap_and_replace and struct gnttab_unmap_and_replace are > > already present in include/xen/interface/grant_table.h. > > > > > > > Shouldn''t we use some logic to figure out which hypercall to use if the > > > hypervisor does not support it? > > > > GNTTABOP_unmap_and_replace is not a new hypercall, it has been supported > > by Xen for a very long time. > > > > In a previous iteration of this patch series, I did introduce a new > > hypercall, but then I dropped it because I figured out that I could > > achieve the same thing with the existing hypercall. > > OK, Please tack on: > > Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > P.S. > If you could stick it on devel/for-linus-3.12 that would be super. Thanks!done!