Stefano Stabellini
2013-Aug-04 14:38 UTC
[PATCH v4 0/2] make ballooned out pages have a valid mapping at all times
Hi all,
this patch series limits problems caused by tcp retransmits on NFS when
the original block pages were mapped from a foreign domain and now the
mapping is gone.
It accomplishes the goal by:
1) mapping all ballooned out pages to a per-cpu
"balloon_scratch_page";
2) making sure that once a grant is unmapped, the original mapping to
the per-cpu balloon_scratch_page is restored atomically.
The first patch accomplishes (1), the second patch uses
GNTTABOP_unmap_and_replace to atomically unmap a grant and restore the
original mapping.
Changes in this version:
- add an early_initcall to clear all the possible per_cpu
balloon_scratch_page.
Stefano Stabellini (2):
xen/balloon: set a mapping for ballooned out pages
xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
arch/x86/xen/p2m.c | 22 ++++++++++-----
drivers/xen/balloon.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++--
drivers/xen/gntdev.c | 11 +------
include/xen/balloon.h | 3 ++
4 files changed, 86 insertions(+), 19 deletions(-)
git://git.kernel.org/pub/scm/linux/kernel/git/sstabellini/xen.git
valid_mapping_4
Cheers,
Stefano
Stefano Stabellini
2013-Aug-04 14:39 UTC
[PATCH v4 1/2] xen/balloon: set a mapping for ballooned out pages
Currently ballooned out pages are mapped to 0 and have INVALID_P2M_ENTRY
in the p2m. These ballooned out pages are used to map foreign grants
by gntdev and blkback (see alloc_xenballooned_pages).
Allocate a page per cpu and map all the ballooned out pages to the
corresponding mfn. Set the p2m accordingly. This way reading from a
ballooned out page won''t cause a kernel crash (see
http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html).
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
CC: alex@alex.org.uk
CC: dcrisan@flexiant.com
---
drivers/xen/balloon.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++--
include/xen/balloon.h | 3 ++
2 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 930fb68..d5ff74f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -36,6 +36,7 @@
* IN THE SOFTWARE.
*/
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/errno.h>
@@ -50,6 +51,7 @@
#include <linux/notifier.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
+#include <linux/percpu-defs.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -88,6 +90,8 @@ EXPORT_SYMBOL_GPL(balloon_stats);
/* We increase/decrease in batches which fit in a page */
static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
+static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
+
#ifdef CONFIG_HIGHMEM
#define inc_totalhigh_pages() (totalhigh_pages++)
@@ -423,7 +427,8 @@ static enum bp_state decrease_reservation(unsigned long
nr_pages, gfp_t gfp)
if (xen_pv_domain() && !PageHighMem(page)) {
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
+ pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)),
+ PAGE_KERNEL_RO), 0);
BUG_ON(ret);
}
#endif
@@ -436,7 +441,8 @@ static enum bp_state decrease_reservation(unsigned long
nr_pages, gfp_t gfp)
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = mfn_to_pfn(frame_list[i]);
- __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ __set_phys_to_machine(pfn,
+ pfn_to_mfn(page_to_pfn(__get_cpu_var(balloon_scratch_page))));
balloon_append(pfn_to_page(pfn));
}
@@ -491,6 +497,18 @@ static void balloon_process(struct work_struct *work)
mutex_unlock(&balloon_mutex);
}
+struct page *get_balloon_scratch_page(void)
+{
+ struct page *ret = get_cpu_var(balloon_scratch_page);
+ BUG_ON(ret == NULL);
+ return ret;
+}
+
+void put_balloon_scratch_page(void)
+{
+ put_cpu_var(balloon_scratch_page);
+}
+
/* Resets the Xen limit, sets new target, and kicks off processing. */
void balloon_set_new_target(unsigned long target)
{
@@ -584,13 +602,47 @@ static void __init balloon_add_region(unsigned long
start_pfn,
}
}
+static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (per_cpu(balloon_scratch_page, cpu) != NULL)
+ break;
+ per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+ if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+ pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n",
cpu);
+ return NOTIFY_BAD;
+ }
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block balloon_cpu_notifier __cpuinitdata = {
+ .notifier_call = balloon_cpu_notify,
+};
+
static int __init balloon_init(void)
{
- int i;
+ int i, cpu;
if (!xen_domain())
return -ENODEV;
+ for_each_online_cpu(cpu)
+ {
+ per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+ if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+ pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n",
cpu);
+ return -ENOMEM;
+ }
+ }
+ register_cpu_notifier(&balloon_cpu_notifier);
+
pr_info("xen/balloon: Initialising balloon driver.\n");
balloon_stats.current_pages = xen_pv_domain()
@@ -627,4 +679,15 @@ static int __init balloon_init(void)
subsys_initcall(balloon_init);
+static int __init balloon_clear(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(balloon_scratch_page, cpu) = NULL;
+
+ return 0;
+}
+early_initcall(balloon_clear);
+
MODULE_LICENSE("GPL");
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index cc2e1a7..a4c1c6a 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -29,6 +29,9 @@ int alloc_xenballooned_pages(int nr_pages, struct page
**pages,
bool highmem);
void free_xenballooned_pages(int nr_pages, struct page **pages);
+struct page *get_balloon_scratch_page(void);
+void put_balloon_scratch_page(void);
+
struct device;
#ifdef CONFIG_XEN_SELFBALLOONING
extern int register_xen_selfballooning(struct device *dev);
--
1.7.2.5
Stefano Stabellini
2013-Aug-04 14:39 UTC
[PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0
mapping instead of reinstating the original mapping.
Doing so separately would be racy.
To unmap a grant and reinstate the original mapping atomically we use
GNTTABOP_unmap_and_replace.
GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so
don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping
passed in new_addr so we have to reinstate it, however that is a
per-cpu mapping only used for balloon scratch pages, so we can be sure that
it''s not going to be accessed while the mapping is not valid.
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
CC: alex@alex.org.uk
CC: dcrisan@flexiant.com
---
arch/x86/xen/p2m.c | 22 +++++++++++++++-------
drivers/xen/gntdev.c | 11 ++---------
2 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa..0d4ec35 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,6 +161,7 @@
#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
+#include <xen/balloon.h>
#include <xen/grant_table.h>
#include "multicalls.h"
@@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page,
if (kmap_op != NULL) {
if (!PageHighMem(page)) {
struct multicall_space mcs;
- struct gnttab_unmap_grant_ref *unmap_op;
+ struct gnttab_unmap_and_replace *unmap_op;
+ struct page *scratch_page = get_balloon_scratch_page();
+ unsigned long scratch_page_address = (unsigned long)
+ __va(page_to_pfn(scratch_page) << PAGE_SHIFT);
/*
* It might be that we queued all the m2p grant table
@@ -990,21 +994,25 @@ int m2p_remove_override(struct page *page,
}
mcs = xen_mc_entry(
- sizeof(struct gnttab_unmap_grant_ref));
+ sizeof(struct gnttab_unmap_and_replace));
unmap_op = mcs.args;
unmap_op->host_addr = kmap_op->host_addr;
+ unmap_op->new_addr = scratch_page_address;
unmap_op->handle = kmap_op->handle;
- unmap_op->dev_bus_addr = 0;
MULTI_grant_table_op(mcs.mc,
- GNTTABOP_unmap_grant_ref, unmap_op, 1);
+ GNTTABOP_unmap_and_replace, unmap_op, 1);
xen_mc_issue(PARAVIRT_LAZY_MMU);
- set_pte_at(&init_mm, address, ptep,
- pfn_pte(pfn, PAGE_KERNEL));
- __flush_tlb_single(address);
+ mcs = __xen_mc_entry(0);
+ MULTI_update_va_mapping(mcs.mc, scratch_page_address,
+ pfn_pte(page_to_pfn(get_balloon_scratch_page()),
+ PAGE_KERNEL_RO), 0);
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
kmap_op->host_addr = 0;
+ put_balloon_scratch_page();
}
}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 3c8803f..51f4c95 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -270,19 +270,12 @@ static int map_grant_pages(struct grant_map *map)
* with find_grant_ptes.
*/
for (i = 0; i < map->count; i++) {
- unsigned level;
unsigned long address = (unsigned long)
pfn_to_kaddr(page_to_pfn(map->pages[i]));
- pte_t *ptep;
- u64 pte_maddr = 0;
BUG_ON(PageHighMem(map->pages[i]));
- ptep = lookup_address(address, &level);
- pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
- gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
- map->flags |
- GNTMAP_host_map |
- GNTMAP_contains_pte,
+ gnttab_set_map_op(&map->kmap_ops[i], address,
+ map->flags | GNTMAP_host_map,
map->grants[i].ref,
map->grants[i].domid);
}
--
1.7.2.5
David Vrabel
2013-Aug-05 14:53 UTC
Re: [PATCH v4 0/2] make ballooned out pages have a valid mapping at all times
On 04/08/13 15:38, Stefano Stabellini wrote:> Hi all, > this patch series limits problems caused by tcp retransmits on NFS when > the original block pages were mapped from a foreign domain and now the > mapping is gone. > > It accomplishes the goal by: > > 1) mapping all ballooned out pages to a per-cpu "balloon_scratch_page"; > 2) making sure that once a grant is unmapped, the original mapping to > the per-cpu balloon_scratch_page is restored atomically. > > The first patch accomplishes (1), the second patch uses > GNTTABOP_unmap_and_replace to atomically unmap a grant and restore the > original mapping. > > > > Changes in this version: > - add an early_initcall to clear all the possible per_cpu > balloon_scratch_page.I know Konrad asked for this but I don''t think it is necessary. Many other users of DEFINE_PER_CPU() assume the space is initialized to zero. e.g., cpufreq_show_table in drivers/cpufreq/freq_table.c If this is the only change in v4 I would suggest taking the v3 patches instead. David
Konrad Rzeszutek Wilk
2013-Aug-09 15:26 UTC
Re: [PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
On Sun, Aug 04, 2013 at 03:39:41PM +0100, Stefano Stabellini wrote:> GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > mapping instead of reinstating the original mapping. > Doing so separately would be racy. > > To unmap a grant and reinstate the original mapping atomically we use > GNTTABOP_unmap_and_replace. > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping > passed in new_addr so we have to reinstate it, however that is a > per-cpu mapping only used for balloon scratch pages, so we can be sure that > it''s not going to be accessed while the mapping is not valid.This looks to be depend on a new structure, which is not in Linux kernel? Are you missing a dependency patch? Shouldn''t we use some logic to figure out which hypercall to use if the hypervisor does not support it?> > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > Reviewed-by: David Vrabel <david.vrabel@citrix.com> > CC: alex@alex.org.uk > CC: dcrisan@flexiant.com > --- > arch/x86/xen/p2m.c | 22 +++++++++++++++------- > drivers/xen/gntdev.c | 11 ++--------- > 2 files changed, 17 insertions(+), 16 deletions(-) > > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c > index 95fb2aa..0d4ec35 100644 > --- a/arch/x86/xen/p2m.c > +++ b/arch/x86/xen/p2m.c > @@ -161,6 +161,7 @@ > #include <asm/xen/page.h> > #include <asm/xen/hypercall.h> > #include <asm/xen/hypervisor.h> > +#include <xen/balloon.h> > #include <xen/grant_table.h> > > #include "multicalls.h" > @@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page, > if (kmap_op != NULL) { > if (!PageHighMem(page)) { > struct multicall_space mcs; > - struct gnttab_unmap_grant_ref *unmap_op; > + struct gnttab_unmap_and_replace *unmap_op; > + struct page *scratch_page = get_balloon_scratch_page(); > + unsigned long scratch_page_address = (unsigned long) > + __va(page_to_pfn(scratch_page) << PAGE_SHIFT); > > /* > * It might be that we queued all the m2p grant table > @@ -990,21 +994,25 @@ int m2p_remove_override(struct page *page, > } > > mcs = xen_mc_entry( > - sizeof(struct gnttab_unmap_grant_ref)); > + sizeof(struct gnttab_unmap_and_replace)); > unmap_op = mcs.args; > unmap_op->host_addr = kmap_op->host_addr; > + unmap_op->new_addr = scratch_page_address; > unmap_op->handle = kmap_op->handle; > - unmap_op->dev_bus_addr = 0; > > MULTI_grant_table_op(mcs.mc, > - GNTTABOP_unmap_grant_ref, unmap_op, 1); > + GNTTABOP_unmap_and_replace, unmap_op, 1); > > xen_mc_issue(PARAVIRT_LAZY_MMU); > > - set_pte_at(&init_mm, address, ptep, > - pfn_pte(pfn, PAGE_KERNEL)); > - __flush_tlb_single(address); > + mcs = __xen_mc_entry(0); > + MULTI_update_va_mapping(mcs.mc, scratch_page_address, > + pfn_pte(page_to_pfn(get_balloon_scratch_page()), > + PAGE_KERNEL_RO), 0); > + xen_mc_issue(PARAVIRT_LAZY_MMU); > + > kmap_op->host_addr = 0; > + put_balloon_scratch_page(); > } > } > > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > index 3c8803f..51f4c95 100644 > --- a/drivers/xen/gntdev.c > +++ b/drivers/xen/gntdev.c > @@ -270,19 +270,12 @@ static int map_grant_pages(struct grant_map *map) > * with find_grant_ptes. > */ > for (i = 0; i < map->count; i++) { > - unsigned level; > unsigned long address = (unsigned long) > pfn_to_kaddr(page_to_pfn(map->pages[i])); > - pte_t *ptep; > - u64 pte_maddr = 0; > BUG_ON(PageHighMem(map->pages[i])); > > - ptep = lookup_address(address, &level); > - pte_maddr = arbitrary_virt_to_machine(ptep).maddr; > - gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, > - map->flags | > - GNTMAP_host_map | > - GNTMAP_contains_pte, > + gnttab_set_map_op(&map->kmap_ops[i], address, > + map->flags | GNTMAP_host_map, > map->grants[i].ref, > map->grants[i].domid); > } > -- > 1.7.2.5 >
Stefano Stabellini
2013-Aug-13 11:17 UTC
Re: [PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
On Fri, 9 Aug 2013, Konrad Rzeszutek Wilk wrote:> On Sun, Aug 04, 2013 at 03:39:41PM +0100, Stefano Stabellini wrote: > > GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > > mapping instead of reinstating the original mapping. > > Doing so separately would be racy. > > > > To unmap a grant and reinstate the original mapping atomically we use > > GNTTABOP_unmap_and_replace. > > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > > don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping > > passed in new_addr so we have to reinstate it, however that is a > > per-cpu mapping only used for balloon scratch pages, so we can be sure that > > it''s not going to be accessed while the mapping is not valid. > > This looks to be depend on a new structure, which is not in Linux kernel? > Are you missing a dependency patch?Nope, GNTTABOP_unmap_and_replace and struct gnttab_unmap_and_replace are already present in include/xen/interface/grant_table.h.> Shouldn''t we use some logic to figure out which hypercall to use if the > hypervisor does not support it?GNTTABOP_unmap_and_replace is not a new hypercall, it has been supported by Xen for a very long time. In a previous iteration of this patch series, I did introduce a new hypercall, but then I dropped it because I figured out that I could achieve the same thing with the existing hypercall.
Konrad Rzeszutek Wilk
2013-Aug-13 13:11 UTC
Re: [PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
On Tue, Aug 13, 2013 at 12:17:18PM +0100, Stefano Stabellini wrote:> On Fri, 9 Aug 2013, Konrad Rzeszutek Wilk wrote: > > On Sun, Aug 04, 2013 at 03:39:41PM +0100, Stefano Stabellini wrote: > > > GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > > > mapping instead of reinstating the original mapping. > > > Doing so separately would be racy. > > > > > > To unmap a grant and reinstate the original mapping atomically we use > > > GNTTABOP_unmap_and_replace. > > > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > > > don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping > > > passed in new_addr so we have to reinstate it, however that is a > > > per-cpu mapping only used for balloon scratch pages, so we can be sure that > > > it''s not going to be accessed while the mapping is not valid. > > > > This looks to be depend on a new structure, which is not in Linux kernel? > > Are you missing a dependency patch? > > Nope, GNTTABOP_unmap_and_replace and struct gnttab_unmap_and_replace are > already present in include/xen/interface/grant_table.h. > > > > Shouldn''t we use some logic to figure out which hypercall to use if the > > hypervisor does not support it? > > GNTTABOP_unmap_and_replace is not a new hypercall, it has been supported > by Xen for a very long time. > > In a previous iteration of this patch series, I did introduce a new > hypercall, but then I dropped it because I figured out that I could > achieve the same thing with the existing hypercall.OK, Please tack on: Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> P.S. If you could stick it on devel/for-linus-3.12 that would be super. Thanks!
Stefano Stabellini
2013-Aug-13 15:38 UTC
Re: [PATCH v4 2/2] xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
On Tue, 13 Aug 2013, Konrad Rzeszutek Wilk wrote:> On Tue, Aug 13, 2013 at 12:17:18PM +0100, Stefano Stabellini wrote: > > On Fri, 9 Aug 2013, Konrad Rzeszutek Wilk wrote: > > > On Sun, Aug 04, 2013 at 03:39:41PM +0100, Stefano Stabellini wrote: > > > > GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 > > > > mapping instead of reinstating the original mapping. > > > > Doing so separately would be racy. > > > > > > > > To unmap a grant and reinstate the original mapping atomically we use > > > > GNTTABOP_unmap_and_replace. > > > > GNTTABOP_unmap_and_replace doesn''t work with GNTMAP_contains_pte, so > > > > don''t use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping > > > > passed in new_addr so we have to reinstate it, however that is a > > > > per-cpu mapping only used for balloon scratch pages, so we can be sure that > > > > it''s not going to be accessed while the mapping is not valid. > > > > > > This looks to be depend on a new structure, which is not in Linux kernel? > > > Are you missing a dependency patch? > > > > Nope, GNTTABOP_unmap_and_replace and struct gnttab_unmap_and_replace are > > already present in include/xen/interface/grant_table.h. > > > > > > > Shouldn''t we use some logic to figure out which hypercall to use if the > > > hypervisor does not support it? > > > > GNTTABOP_unmap_and_replace is not a new hypercall, it has been supported > > by Xen for a very long time. > > > > In a previous iteration of this patch series, I did introduce a new > > hypercall, but then I dropped it because I figured out that I could > > achieve the same thing with the existing hypercall. > > OK, Please tack on: > > Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > P.S. > If you could stick it on devel/for-linus-3.12 that would be super. Thanks!done!