Stefano Stabellini
2013-Jul-21 17:32 UTC
[PATCH 0/3] make ballooned out pages have a valid mapping at all times
Hi all, this patch series limits problems caused by tcp retransmits on NFS when the original block pages were mapped from a foreign domain and now the mapping is gone. It accomplishes the goal by: 1) mapping all ballooned out pages to a "balloon_trade_page"; 2) making sure that once a grant is unmapped, the original mapping to balloon_trade_page is restored atomically. The first patch accomplishes (1), the second patch is necessary to introduce a grant_table operation that we can use to do (2). Finally the last patch uses GNTTABOP_unmap_and_replace to atomically unmap a grant and restore the original mapping. Stefano Stabellini (3): xen/balloon: set a mapping for ballooned out pages xen/grant_table: use the new GNTTABOP_unmap_and_replace hypercall xen/m2p: use GNTTABOP_unmap_and_replace to replace foreign grants with balloon_trade_page arch/x86/xen/p2m.c | 25 ++++++++++++++++++------- drivers/xen/balloon.c | 10 ++++++++-- drivers/xen/gntdev.c | 11 ++--------- drivers/xen/grant-table.c | 8 ++++++++ include/xen/balloon.h | 1 + include/xen/grant_table.h | 1 + include/xen/interface/grant_table.h | 8 +++++--- 7 files changed, 43 insertions(+), 21 deletions(-) Cheers, Stefano
Stefano Stabellini
2013-Jul-21 17:32 UTC
[PATCH 1/3] xen/balloon: set a mapping for ballooned out pages
Currently ballooned out pages are mapped to 0 and have INVALID_P2M_ENTRY in the p2m. These ballooned out pages are used to map foreign grants by gntdev and blkback (see alloc_xenballooned_pages). Allocate a page and map all the ballooned out pages to the corresponding mfn. Set the p2m accordingly. This way reading from a ballooned out page won''t cause a kernel crash (see http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html). Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> CC: alex@alex.org.uk CC: dcrisan@flexiant.com --- drivers/xen/balloon.c | 10 ++++++++-- include/xen/balloon.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 930fb68..91bd599 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -88,6 +88,8 @@ EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; +struct page* balloon_trade_page; + #ifdef CONFIG_HIGHMEM #define inc_totalhigh_pages() (totalhigh_pages++) @@ -423,7 +425,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - __pte_ma(0), 0); + pfn_pte(page_to_pfn(balloon_trade_page), PAGE_KERNEL_RO), 0); BUG_ON(ret); } #endif @@ -436,7 +438,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + __set_phys_to_machine(pfn, pfn_to_mfn(page_to_pfn(balloon_trade_page))); balloon_append(pfn_to_page(pfn)); } @@ -591,6 +593,10 @@ static int __init balloon_init(void) if (!xen_domain()) return -ENODEV; + balloon_trade_page = alloc_page(GFP_KERNEL); + if (balloon_trade_page == NULL) + return -ENOMEM; + pr_info("xen/balloon: Initialising balloon driver.\n"); balloon_stats.current_pages = xen_pv_domain() diff --git a/include/xen/balloon.h b/include/xen/balloon.h index cc2e1a7..5c01a15 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -22,6 +22,7 @@ struct balloon_stats { }; extern struct balloon_stats balloon_stats; +extern struct page *balloon_trade_page; void balloon_set_new_target(unsigned long target); -- 1.7.2.5
Stefano Stabellini
2013-Jul-21 17:32 UTC
[PATCH 2/3] xen/grant_table: use the new GNTTABOP_unmap_and_replace hypercall
The current GNTTABOP_unmap_and_replace (7) is deprecated, rename it to GNTTABOP_unmap_and_replace_legacy. Introduce the new GNTTABOP_unmap_and_replace (12) and detect at boot time which one is available. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> CC: alex@alex.org.uk CC: dcrisan@flexiant.com --- drivers/xen/grant-table.c | 8 ++++++++ include/xen/grant_table.h | 1 + include/xen/interface/grant_table.h | 8 +++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 04c1b2d..0f7d00b 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -65,6 +65,8 @@ static grant_ref_t gnttab_free_head; static DEFINE_SPINLOCK(gnttab_list_lock); unsigned long xen_hvm_resume_frames; EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); +int __read_mostly gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace; +EXPORT_SYMBOL_GPL(gnttabop_unmap_and_replace); static union { struct grant_entry_v1 *v1; @@ -1238,6 +1240,12 @@ int gnttab_init(void) gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; gnttab_free_head = NR_RESERVED_ENTRIES; + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, NULL, 1); + if (ret == -ENOSYS) { + gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace_legacy; + pr_warn("GNTTABOP_unmap_and_replace unavailable, falling back to the racy legacy version\n"); + } + printk("Grant table initialized\n"); return 0; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 694dcaf..867d1a7 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -179,6 +179,7 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); extern unsigned long xen_hvm_resume_frames; +extern int gnttabop_unmap_and_replace; unsigned int gnttab_max_grant_frames(void); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index e40fae9..2a2b74f 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -408,15 +408,17 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); /* * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings * tracked by <handle> but atomically replace the page table entry with one - * pointing to the machine address under <new_addr>. <new_addr> will be - * redirected to the null entry. + * pointing to the machine address under <new_addr>. + * GNTTABOP_unmap_and_replace_legacy also redirects <new_addr> to the + * null entry * NOTES: * 1. The call may fail in an undefined manner if either mapping is not * tracked by <handle>. * 2. After executing a batch of unmaps, it is guaranteed that no stale * mappings will remain in the device or host TLBs. */ -#define GNTTABOP_unmap_and_replace 7 +#define GNTTABOP_unmap_and_replace_legacy 7 +#define GNTTABOP_unmap_and_replace 12 struct gnttab_unmap_and_replace { /* IN parameters. */ uint64_t host_addr; -- 1.7.2.5
Stefano Stabellini
2013-Jul-21 17:32 UTC
[PATCH 3/3] xen/m2p: use GNTTABOP_unmap_and_replace to replace foreign grants with balloon_trade_page
GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 mapping instead of reinstating the original mapping. Doing so separately would be racy. To unmap a grant and reinstate the original mapping atomically we use GNTTABOP_unmap_and_replace. GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so don''t use it for kmaps. Note: the old GNTTABOP_unmap_and_replace_legacy zeros the mapping passed as "new_addr". This can cause race conditions when another cpu tries to use the mapping before it has been restored. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> CC: alex@alex.org.uk CC: dcrisan@flexiant.com --- arch/x86/xen/p2m.c | 25 ++++++++++++++++++------- drivers/xen/gntdev.c | 11 ++--------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 95fb2aa..20d7056 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -161,6 +161,7 @@ #include <asm/xen/page.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> +#include <xen/balloon.h> #include <xen/grant_table.h> #include "multicalls.h" @@ -967,7 +968,9 @@ int m2p_remove_override(struct page *page, if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; - struct gnttab_unmap_grant_ref *unmap_op; + struct gnttab_unmap_and_replace *unmap_op; + unsigned long trade_page_address = (unsigned long) + __va(page_to_pfn(balloon_trade_page) << PAGE_SHIFT); /* * It might be that we queued all the m2p grant table @@ -990,20 +993,28 @@ int m2p_remove_override(struct page *page, } mcs = xen_mc_entry( - sizeof(struct gnttab_unmap_grant_ref)); + sizeof(struct gnttab_unmap_and_replace)); unmap_op = mcs.args; unmap_op->host_addr = kmap_op->host_addr; + unmap_op->new_addr = trade_page_address; unmap_op->handle = kmap_op->handle; - unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, - GNTTABOP_unmap_grant_ref, unmap_op, 1); + gnttabop_unmap_and_replace, unmap_op, 1); xen_mc_issue(PARAVIRT_LAZY_MMU); - set_pte_at(&init_mm, address, ptep, - pfn_pte(pfn, PAGE_KERNEL)); - __flush_tlb_single(address); + /* this version of the hypercall is racy, let''s try to limit + * the damage */ + if (unlikely(gnttabop_unmap_and_replace =+ GNTTABOP_unmap_and_replace_legacy)) { + mcs = __xen_mc_entry(0); + + MULTI_update_va_mapping(mcs.mc, trade_page_address, + pfn_pte(page_to_pfn(balloon_trade_page), PAGE_KERNEL_RO), + 0); + xen_mc_issue(PARAVIRT_LAZY_MMU); + } kmap_op->host_addr = 0; } } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 3c8803f..51f4c95 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -270,19 +270,12 @@ static int map_grant_pages(struct grant_map *map) * with find_grant_ptes. */ for (i = 0; i < map->count; i++) { - unsigned level; unsigned long address = (unsigned long) pfn_to_kaddr(page_to_pfn(map->pages[i])); - pte_t *ptep; - u64 pte_maddr = 0; BUG_ON(PageHighMem(map->pages[i])); - ptep = lookup_address(address, &level); - pte_maddr = arbitrary_virt_to_machine(ptep).maddr; - gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, - map->flags | - GNTMAP_host_map | - GNTMAP_contains_pte, + gnttab_set_map_op(&map->kmap_ops[i], address, + map->flags | GNTMAP_host_map, map->grants[i].ref, map->grants[i].domid); } -- 1.7.2.5
Ian Campbell
2013-Jul-21 17:44 UTC
Re: [PATCH 2/3] xen/grant_table: use the new GNTTABOP_unmap_and_replace hypercall
On Sun, 2013-07-21 at 18:32 +0100, Stefano Stabellini wrote:> The current GNTTABOP_unmap_and_replace (7) is deprecated, rename > it to GNTTABOP_unmap_and_replace_legacy.We would normally call this foo_compat.> Introduce the new > GNTTABOP_unmap_and_replace (12) and detect at boot time which one is > available.I don''t think this exists yet, is there a h/v patch under separate cover?> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > CC: alex@alex.org.uk > CC: dcrisan@flexiant.com > --- > drivers/xen/grant-table.c | 8 ++++++++ > include/xen/grant_table.h | 1 + > include/xen/interface/grant_table.h | 8 +++++--- > 3 files changed, 14 insertions(+), 3 deletions(-) > > diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c > index 04c1b2d..0f7d00b 100644 > --- a/drivers/xen/grant-table.c > +++ b/drivers/xen/grant-table.c > @@ -65,6 +65,8 @@ static grant_ref_t gnttab_free_head; > static DEFINE_SPINLOCK(gnttab_list_lock); > unsigned long xen_hvm_resume_frames; > EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); > +int __read_mostly gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace; > +EXPORT_SYMBOL_GPL(gnttabop_unmap_and_replace);I can see this getting set but not used, does it really need to be exported?> > static union { > struct grant_entry_v1 *v1; > @@ -1238,6 +1240,12 @@ int gnttab_init(void) > gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; > gnttab_free_head = NR_RESERVED_ENTRIES; > > + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, NULL, 1); > + if (ret == -ENOSYS) { > + gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace_legacy; > + pr_warn("GNTTABOP_unmap_and_replace unavailable, falling back to the racy legacy version\n");Perhaps WARN_ONCE?> + } > + > printk("Grant table initialized\n"); > return 0; > > diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h > index 694dcaf..867d1a7 100644 > --- a/include/xen/grant_table.h > +++ b/include/xen/grant_table.h > @@ -179,6 +179,7 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, > void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); > > extern unsigned long xen_hvm_resume_frames; > +extern int gnttabop_unmap_and_replace; > unsigned int gnttab_max_grant_frames(void); > > #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) > diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h > index e40fae9..2a2b74f 100644 > --- a/include/xen/interface/grant_table.h > +++ b/include/xen/interface/grant_table.h > @@ -408,15 +408,17 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); > /* > * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings > * tracked by <handle> but atomically replace the page table entry with one > - * pointing to the machine address under <new_addr>. <new_addr> will be > - * redirected to the null entry. > + * pointing to the machine address under <new_addr>. > + * GNTTABOP_unmap_and_replace_legacy also redirects <new_addr> to the > + * null entryI wonder if rather than _legacy (or _compat) if the old behaviour might be well described as "unmap_and_swap", since I think that is what clearing new_addr means?> > * NOTES: > * 1. The call may fail in an undefined manner if either mapping is not > * tracked by <handle>. > * 2. After executing a batch of unmaps, it is guaranteed that no stale > * mappings will remain in the device or host TLBs. > */ > -#define GNTTABOP_unmap_and_replace 7 > +#define GNTTABOP_unmap_and_replace_legacy 7 > +#define GNTTABOP_unmap_and_replace 12 > struct gnttab_unmap_and_replace { > /* IN parameters. */ > uint64_t host_addr;
Ian Campbell
2013-Jul-21 17:49 UTC
Re: [PATCH 3/3] xen/m2p: use GNTTABOP_unmap_and_replace to replace foreign grants with balloon_trade_page
On Sun, 2013-07-21 at 18:32 +0100, Stefano Stabellini wrote:> GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > mapping instead of reinstating the original mapping. > Doing so separately would be racy. > To unmap a grant and reinstate the original mapping atomically we use > GNTTABOP_unmap_and_replace. > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > don''t use it for kmaps. > > Note: the old GNTTABOP_unmap_and_replace_legacy zeros the mapping passed > as "new_addr". This can cause race conditions when another cpu tries to > use the mapping before it has been restored. > > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > CC: alex@alex.org.uk > CC: dcrisan@flexiant.com > --- > arch/x86/xen/p2m.c | 25 ++++++++++++++++++------- > drivers/xen/gntdev.c | 11 ++--------- > 2 files changed, 20 insertions(+), 16 deletions(-) > > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c > index 95fb2aa..20d7056 100644 > --- a/arch/x86/xen/p2m.c > +++ b/arch/x86/xen/p2m.c > @@ -161,6 +161,7 @@ > #include <asm/xen/page.h> > #include <asm/xen/hypercall.h> > #include <asm/xen/hypervisor.h> > +#include <xen/balloon.h> > #include <xen/grant_table.h> > > #include "multicalls.h" > @@ -967,7 +968,9 @@ int m2p_remove_override(struct page *page, > if (kmap_op != NULL) { > if (!PageHighMem(page)) { > struct multicall_space mcs; > - struct gnttab_unmap_grant_ref *unmap_op; > + struct gnttab_unmap_and_replace *unmap_op; > + unsigned long trade_page_address = (unsigned long) > + __va(page_to_pfn(balloon_trade_page) << PAGE_SHIFT);Rather than exporting this from the balloon driver could the gnttab code not remember what the original mapping was and simply replace it? In pages which came from the balloon driver this would effectively aways be the trade page but it would work for pages from other sources too.> /* > * It might be that we queued all the m2p grant table > @@ -990,20 +993,28 @@ int m2p_remove_override(struct page *page, > } > > mcs = xen_mc_entry( > - sizeof(struct gnttab_unmap_grant_ref)); > + sizeof(struct gnttab_unmap_and_replace)); > unmap_op = mcs.args; > unmap_op->host_addr = kmap_op->host_addr; > + unmap_op->new_addr = trade_page_address;If you are using the old replace operation won''t this nuke the existing mapping of the trade page and knacker any future uses? i.e. it breaks the fallback case?> unmap_op->handle = kmap_op->handle; > - unmap_op->dev_bus_addr = 0; > > MULTI_grant_table_op(mcs.mc, > - GNTTABOP_unmap_grant_ref, unmap_op, 1); > + gnttabop_unmap_and_replace, unmap_op, 1); > > xen_mc_issue(PARAVIRT_LAZY_MMU); > > - set_pte_at(&init_mm, address, ptep, > - pfn_pte(pfn, PAGE_KERNEL)); > - __flush_tlb_single(address); > + /* this version of the hypercall is racy, let''s try to limit > + * the damage */ > + if (unlikely(gnttabop_unmap_and_replace => + GNTTABOP_unmap_and_replace_legacy)) {Does this race go away if you allocate per-cpu virtual addresses which all alias the same pfn?> + mcs = __xen_mc_entry(0); > + > + MULTI_update_va_mapping(mcs.mc, trade_page_address, > + pfn_pte(page_to_pfn(balloon_trade_page), PAGE_KERNEL_RO), > + 0); > + xen_mc_issue(PARAVIRT_LAZY_MMU); > + } > kmap_op->host_addr = 0; > } > }
Stefano Stabellini
2013-Jul-22 10:32 UTC
Re: [PATCH 3/3] xen/m2p: use GNTTABOP_unmap_and_replace to replace foreign grants with balloon_trade_page
On Sun, 21 Jul 2013, Ian Campbell wrote:> On Sun, 2013-07-21 at 18:32 +0100, Stefano Stabellini wrote: > > GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > > mapping instead of reinstating the original mapping. > > Doing so separately would be racy. > > To unmap a grant and reinstate the original mapping atomically we use > > GNTTABOP_unmap_and_replace. > > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > > don''t use it for kmaps. > > > > Note: the old GNTTABOP_unmap_and_replace_legacy zeros the mapping passed > > as "new_addr". This can cause race conditions when another cpu tries to > > use the mapping before it has been restored. > > > > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > > CC: alex@alex.org.uk > > CC: dcrisan@flexiant.com > > --- > > arch/x86/xen/p2m.c | 25 ++++++++++++++++++------- > > drivers/xen/gntdev.c | 11 ++--------- > > 2 files changed, 20 insertions(+), 16 deletions(-) > > > > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c > > index 95fb2aa..20d7056 100644 > > --- a/arch/x86/xen/p2m.c > > +++ b/arch/x86/xen/p2m.c > > @@ -161,6 +161,7 @@ > > #include <asm/xen/page.h> > > #include <asm/xen/hypercall.h> > > #include <asm/xen/hypervisor.h> > > +#include <xen/balloon.h> > > #include <xen/grant_table.h> > > > > #include "multicalls.h" > > @@ -967,7 +968,9 @@ int m2p_remove_override(struct page *page, > > if (kmap_op != NULL) { > > if (!PageHighMem(page)) { > > struct multicall_space mcs; > > - struct gnttab_unmap_grant_ref *unmap_op; > > + struct gnttab_unmap_and_replace *unmap_op; > > + unsigned long trade_page_address = (unsigned long) > > + __va(page_to_pfn(balloon_trade_page) << PAGE_SHIFT); > > Rather than exporting this from the balloon driver could the gnttab code > not remember what the original mapping was and simply replace it? In > pages which came from the balloon driver this would effectively aways be > the trade page but it would work for pages from other sources too.The m2p code already remembers exactly what the original mapping was. In fact the original mfn is stored in page->index. However that is the mfn and gnttab_unmap_and_replace takes a virtual address. It could take a pte pointer but that functionality is not implemented. If I use __va on the page I get the current mapping (the grant). To avoid confusion I exported balloon_trade_page instead.> > /* > > * It might be that we queued all the m2p grant table > > @@ -990,20 +993,28 @@ int m2p_remove_override(struct page *page, > > } > > > > mcs = xen_mc_entry( > > - sizeof(struct gnttab_unmap_grant_ref)); > > + sizeof(struct gnttab_unmap_and_replace)); > > unmap_op = mcs.args; > > unmap_op->host_addr = kmap_op->host_addr; > > + unmap_op->new_addr = trade_page_address; > > If you are using the old replace operation won''t this nuke the existing > mapping of the trade page and knacker any future uses? i.e. it breaks > the fallback case?Yes> > unmap_op->handle = kmap_op->handle; > > - unmap_op->dev_bus_addr = 0; > > > > MULTI_grant_table_op(mcs.mc, > > - GNTTABOP_unmap_grant_ref, unmap_op, 1); > > + gnttabop_unmap_and_replace, unmap_op, 1); > > > > xen_mc_issue(PARAVIRT_LAZY_MMU); > > > > - set_pte_at(&init_mm, address, ptep, > > - pfn_pte(pfn, PAGE_KERNEL)); > > - __flush_tlb_single(address); > > + /* this version of the hypercall is racy, let''s try to limit > > + * the damage */ > > + if (unlikely(gnttabop_unmap_and_replace => > + GNTTABOP_unmap_and_replace_legacy)) { > > Does this race go away if you allocate per-cpu virtual addresses which > all alias the same pfn?Yes. I admit that I might have been blinded by my hatred against GNTTABOP_unmap_and_replace: I thought that an interface that broken would need to be fixed and backported. Also I didn''t want to allocate nr_cpu pages of virtual addresses and handle cpu-hotplug notifications. If we go through all the trouble of doing that, should we even bother introducing the new GNTTABOP_unmap_and_replace?
Stefano Stabellini
2013-Jul-22 10:38 UTC
Re: [PATCH 2/3] xen/grant_table: use the new GNTTABOP_unmap_and_replace hypercall
On Sun, 21 Jul 2013, Ian Campbell wrote:> > diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c > > index 04c1b2d..0f7d00b 100644 > > --- a/drivers/xen/grant-table.c > > +++ b/drivers/xen/grant-table.c > > @@ -65,6 +65,8 @@ static grant_ref_t gnttab_free_head; > > static DEFINE_SPINLOCK(gnttab_list_lock); > > unsigned long xen_hvm_resume_frames; > > EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); > > +int __read_mostly gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace; > > +EXPORT_SYMBOL_GPL(gnttabop_unmap_and_replace); > > I can see this getting set but not used, does it really need to be > exported?I use it in the next patch> > > > static union { > > struct grant_entry_v1 *v1; > > @@ -1238,6 +1240,12 @@ int gnttab_init(void) > > gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; > > gnttab_free_head = NR_RESERVED_ENTRIES; > > > > + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, NULL, 1); > > + if (ret == -ENOSYS) { > > + gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace_legacy; > > + pr_warn("GNTTABOP_unmap_and_replace unavailable, falling back to the racy legacy version\n"); > > Perhaps WARN_ONCE?It''s already going through that function just once: the function is gnttab_init> > + } > > + > > printk("Grant table initialized\n"); > > return 0; > > > > diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h > > index 694dcaf..867d1a7 100644 > > --- a/include/xen/grant_table.h > > +++ b/include/xen/grant_table.h > > @@ -179,6 +179,7 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, > > void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); > > > > extern unsigned long xen_hvm_resume_frames; > > +extern int gnttabop_unmap_and_replace; > > unsigned int gnttab_max_grant_frames(void); > > > > #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) > > diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h > > index e40fae9..2a2b74f 100644 > > --- a/include/xen/interface/grant_table.h > > +++ b/include/xen/interface/grant_table.h > > @@ -408,15 +408,17 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); > > /* > > * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings > > * tracked by <handle> but atomically replace the page table entry with one > > - * pointing to the machine address under <new_addr>. <new_addr> will be > > - * redirected to the null entry. > > + * pointing to the machine address under <new_addr>. > > + * GNTTABOP_unmap_and_replace_legacy also redirects <new_addr> to the > > + * null entry > > I wonder if rather than _legacy (or _compat) if the old behaviour might > be well described as "unmap_and_swap", since I think that is what > clearing new_addr means?Maybe GNTTABOP_unmap_and_swap is a better name. However it''s still pretty far from what we need. Should I work-around the limitations of the current hypercall or implement a new one? I don''t think that doing both is a great strategy in this case.
David Vrabel
2013-Jul-22 11:40 UTC
Re: [PATCH 0/3] make ballooned out pages have a valid mapping at all times
On 21/07/13 18:32, Stefano Stabellini wrote:> Hi all, > this patch series limits problems caused by tcp retransmits on NFS when > the original block pages were mapped from a foreign domain and now the > mapping is gone. > > It accomplishes the goal by: > > 1) mapping all ballooned out pages to a "balloon_trade_page"; > 2) making sure that once a grant is unmapped, the original mapping to > balloon_trade_page is restored atomically.I think this can be fixed without any hypervisor-side changes, although hypervisor changes will allow you to do it more efficiently. Use a per-CPU set of trade pages. Note MFN of this CPU''s trade page (trade_mfn). Do the grant_unmap_and_replace(), (trade page mapping''s MFN is cleared but this is ok as nothing is accessing the page via this mapping). update_va_mapping on trade page VA to set its MFN to trade_mfn. David
Stefano Stabellini
2013-Jul-22 15:35 UTC
Re: [PATCH 0/3] make ballooned out pages have a valid mapping at all times
On Mon, 22 Jul 2013, David Vrabel wrote:> On 21/07/13 18:32, Stefano Stabellini wrote: > > Hi all, > > this patch series limits problems caused by tcp retransmits on NFS when > > the original block pages were mapped from a foreign domain and now the > > mapping is gone. > > > > It accomplishes the goal by: > > > > 1) mapping all ballooned out pages to a "balloon_trade_page"; > > 2) making sure that once a grant is unmapped, the original mapping to > > balloon_trade_page is restored atomically. > > I think this can be fixed without any hypervisor-side changes, although > hypervisor changes will allow you to do it more efficiently. > > Use a per-CPU set of trade pages. > > Note MFN of this CPU''s trade page (trade_mfn). > Do the grant_unmap_and_replace(), (trade page mapping''s MFN is cleared > but this is ok as nothing is accessing the page via this mapping). > update_va_mapping on trade page VA to set its MFN to trade_mfn.I am going to go for this, avoiding any Xen side changes.
Ian Campbell
2013-Jul-22 15:50 UTC
Re: [PATCH 2/3] xen/grant_table: use the new GNTTABOP_unmap_and_replace hypercall
On Mon, 2013-07-22 at 11:38 +0100, Stefano Stabellini wrote:> > > > > > static union { > > > struct grant_entry_v1 *v1; > > > @@ -1238,6 +1240,12 @@ int gnttab_init(void) > > > gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; > > > gnttab_free_head = NR_RESERVED_ENTRIES; > > > > > > + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, NULL, 1); > > > + if (ret == -ENOSYS) { > > > + gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace_legacy; > > > + pr_warn("GNTTABOP_unmap_and_replace unavailable, falling back to the racy legacy version\n"); > > > > Perhaps WARN_ONCE? > > It''s already going through that function just once: the function is > gnttab_initOh, so it is!> > > > > + } > > > + > > > printk("Grant table initialized\n"); > > > return 0; > > > > > > diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h > > > index 694dcaf..867d1a7 100644 > > > --- a/include/xen/grant_table.h > > > +++ b/include/xen/grant_table.h > > > @@ -179,6 +179,7 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, > > > void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); > > > > > > extern unsigned long xen_hvm_resume_frames; > > > +extern int gnttabop_unmap_and_replace; > > > unsigned int gnttab_max_grant_frames(void); > > > > > > #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) > > > diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h > > > index e40fae9..2a2b74f 100644 > > > --- a/include/xen/interface/grant_table.h > > > +++ b/include/xen/interface/grant_table.h > > > @@ -408,15 +408,17 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); > > > /* > > > * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings > > > * tracked by <handle> but atomically replace the page table entry with one > > > - * pointing to the machine address under <new_addr>. <new_addr> will be > > > - * redirected to the null entry. > > > + * pointing to the machine address under <new_addr>. > > > + * GNTTABOP_unmap_and_replace_legacy also redirects <new_addr> to the > > > + * null entry > > > > I wonder if rather than _legacy (or _compat) if the old behaviour might > > be well described as "unmap_and_swap", since I think that is what > > clearing new_addr means? > > Maybe GNTTABOP_unmap_and_swap is a better name. However it''s still > pretty far from what we need. Should I work-around the limitations of the > current hypercall or implement a new one? > I don''t think that doing both is a great strategy in this case.Not sure what you mean by both, whatever you do the old hypercall has to keep working, it''s used by classic-Xen netback. BTW, the existing netback user is a fallback path which does: * allocate a new page * copy content of the granted page into it * unmap and replace to swap out the new page for the grant Not sure if that makes the reason for zeroing new_addr make more sense ;-) Ian
Ian Campbell
2013-Jul-22 15:51 UTC
Re: [PATCH 0/3] make ballooned out pages have a valid mapping at all times
On Mon, 2013-07-22 at 16:35 +0100, Stefano Stabellini wrote:> On Mon, 22 Jul 2013, David Vrabel wrote: > > On 21/07/13 18:32, Stefano Stabellini wrote: > > > Hi all, > > > this patch series limits problems caused by tcp retransmits on NFS when > > > the original block pages were mapped from a foreign domain and now the > > > mapping is gone. > > > > > > It accomplishes the goal by: > > > > > > 1) mapping all ballooned out pages to a "balloon_trade_page"; > > > 2) making sure that once a grant is unmapped, the original mapping to > > > balloon_trade_page is restored atomically. > > > > I think this can be fixed without any hypervisor-side changes, although > > hypervisor changes will allow you to do it more efficiently. > > > > Use a per-CPU set of trade pages. > > > > Note MFN of this CPU''s trade page (trade_mfn). > > Do the grant_unmap_and_replace(), (trade page mapping''s MFN is cleared > > but this is ok as nothing is accessing the page via this mapping). > > update_va_mapping on trade page VA to set its MFN to trade_mfn. > > I am going to go for this, avoiding any Xen side changes.Oops, I won''t bother responding to the rest of your replies to my comments on the patches then. You can safely ignore the couple I just sent. Ian.
Konrad Rzeszutek Wilk
2013-Jul-23 14:08 UTC
Re: [PATCH 2/3] xen/grant_table: use the new GNTTABOP_unmap_and_replace hypercall
On Sun, Jul 21, 2013 at 06:32:30PM +0100, Stefano Stabellini wrote:> The current GNTTABOP_unmap_and_replace (7) is deprecated, rename > it to GNTTABOP_unmap_and_replace_legacy. Introduce the new > GNTTABOP_unmap_and_replace (12) and detect at boot time which one is > available. > > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > CC: alex@alex.org.uk > CC: dcrisan@flexiant.com > --- > drivers/xen/grant-table.c | 8 ++++++++ > include/xen/grant_table.h | 1 + > include/xen/interface/grant_table.h | 8 +++++--- > 3 files changed, 14 insertions(+), 3 deletions(-) > > diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c > index 04c1b2d..0f7d00b 100644 > --- a/drivers/xen/grant-table.c > +++ b/drivers/xen/grant-table.c > @@ -65,6 +65,8 @@ static grant_ref_t gnttab_free_head; > static DEFINE_SPINLOCK(gnttab_list_lock); > unsigned long xen_hvm_resume_frames; > EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); > +int __read_mostly gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace; > +EXPORT_SYMBOL_GPL(gnttabop_unmap_and_replace); > > static union { > struct grant_entry_v1 *v1; > @@ -1238,6 +1240,12 @@ int gnttab_init(void) > gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; > gnttab_free_head = NR_RESERVED_ENTRIES; > > + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, NULL, 1); > + if (ret == -ENOSYS) { > + gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace_legacy; > + pr_warn("GNTTABOP_unmap_and_replace unavailable, falling back to the racy legacy version\n");This would show up all the time on Xen 4.1 and Xen 3.6 - so a variety of Cloud providers would get this. Do we get any in the field diagnostic help with this chatty message?> + } > + > printk("Grant table initialized\n"); > return 0; > > diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h > index 694dcaf..867d1a7 100644 > --- a/include/xen/grant_table.h > +++ b/include/xen/grant_table.h > @@ -179,6 +179,7 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, > void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); > > extern unsigned long xen_hvm_resume_frames; > +extern int gnttabop_unmap_and_replace; > unsigned int gnttab_max_grant_frames(void); > > #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) > diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h > index e40fae9..2a2b74f 100644 > --- a/include/xen/interface/grant_table.h > +++ b/include/xen/interface/grant_table.h > @@ -408,15 +408,17 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); > /* > * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings > * tracked by <handle> but atomically replace the page table entry with one > - * pointing to the machine address under <new_addr>. <new_addr> will be > - * redirected to the null entry. > + * pointing to the machine address under <new_addr>. > + * GNTTABOP_unmap_and_replace_legacy also redirects <new_addr> to the > + * null entryI am not following that statement. redirects <new_addr> to the null entry? What is a ''null entry''? Does that mean that the page table entry will have the value of zero? Or that it will have an PTE entry with the MFN of zero? The latter means one could actually still use the PTE but would get an surprise as many folks could be putting stuff in there.> * NOTES: > * 1. The call may fail in an undefined manner if either mapping is not > * tracked by <handle>. > * 2. After executing a batch of unmaps, it is guaranteed that no stale > * mappings will remain in the device or host TLBs. > */ > -#define GNTTABOP_unmap_and_replace 7 > +#define GNTTABOP_unmap_and_replace_legacy 7 > +#define GNTTABOP_unmap_and_replace 12 > struct gnttab_unmap_and_replace { > /* IN parameters. */ > uint64_t host_addr; > -- > 1.7.2.5 >
Stefano Stabellini
2013-Jul-23 16:53 UTC
Re: [PATCH 2/3] xen/grant_table: use the new GNTTABOP_unmap_and_replace hypercall
On Tue, 23 Jul 2013, Konrad Rzeszutek Wilk wrote:> On Sun, Jul 21, 2013 at 06:32:30PM +0100, Stefano Stabellini wrote: > > The current GNTTABOP_unmap_and_replace (7) is deprecated, rename > > it to GNTTABOP_unmap_and_replace_legacy. Introduce the new > > GNTTABOP_unmap_and_replace (12) and detect at boot time which one is > > available. > > > > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > > CC: alex@alex.org.uk > > CC: dcrisan@flexiant.com > > --- > > drivers/xen/grant-table.c | 8 ++++++++ > > include/xen/grant_table.h | 1 + > > include/xen/interface/grant_table.h | 8 +++++--- > > 3 files changed, 14 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c > > index 04c1b2d..0f7d00b 100644 > > --- a/drivers/xen/grant-table.c > > +++ b/drivers/xen/grant-table.c > > @@ -65,6 +65,8 @@ static grant_ref_t gnttab_free_head; > > static DEFINE_SPINLOCK(gnttab_list_lock); > > unsigned long xen_hvm_resume_frames; > > EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); > > +int __read_mostly gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace; > > +EXPORT_SYMBOL_GPL(gnttabop_unmap_and_replace); > > > > static union { > > struct grant_entry_v1 *v1; > > @@ -1238,6 +1240,12 @@ int gnttab_init(void) > > gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; > > gnttab_free_head = NR_RESERVED_ENTRIES; > > > > + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, NULL, 1); > > + if (ret == -ENOSYS) { > > + gnttabop_unmap_and_replace = GNTTABOP_unmap_and_replace_legacy; > > + pr_warn("GNTTABOP_unmap_and_replace unavailable, falling back to the racy legacy version\n"); > > This would show up all the time on Xen 4.1 and Xen 3.6 - so a variety > of Cloud providers would get this. Do we get any in the field diagnostic > help with this chatty message?Yeah, I have removed it together with the proposal of the new hypercall.> > + } > > + > > printk("Grant table initialized\n"); > > return 0; > > > > diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h > > index 694dcaf..867d1a7 100644 > > --- a/include/xen/grant_table.h > > +++ b/include/xen/grant_table.h > > @@ -179,6 +179,7 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, > > void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); > > > > extern unsigned long xen_hvm_resume_frames; > > +extern int gnttabop_unmap_and_replace; > > unsigned int gnttab_max_grant_frames(void); > > > > #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) > > diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h > > index e40fae9..2a2b74f 100644 > > --- a/include/xen/interface/grant_table.h > > +++ b/include/xen/interface/grant_table.h > > @@ -408,15 +408,17 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); > > /* > > * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings > > * tracked by <handle> but atomically replace the page table entry with one > > - * pointing to the machine address under <new_addr>. <new_addr> will be > > - * redirected to the null entry. > > + * pointing to the machine address under <new_addr>. > > + * GNTTABOP_unmap_and_replace_legacy also redirects <new_addr> to the > > + * null entry > > I am not following that statement. redirects <new_addr> to the null entry? > What is a ''null entry''? > Does that mean that the page table entry will have > the value of zero?Yep> Or that it will have an PTE entry with the MFN of zero? > > The latter means one could actually still use the PTE but would get > an surprise as many folks could be putting stuff in there. > > > * NOTES: > > * 1. The call may fail in an undefined manner if either mapping is not > > * tracked by <handle>. > > * 2. After executing a batch of unmaps, it is guaranteed that no stale > > * mappings will remain in the device or host TLBs. > > */ > > -#define GNTTABOP_unmap_and_replace 7 > > +#define GNTTABOP_unmap_and_replace_legacy 7 > > +#define GNTTABOP_unmap_and_replace 12 > > struct gnttab_unmap_and_replace { > > /* IN parameters. */ > > uint64_t host_addr; > > -- > > 1.7.2.5 > > >