Anthony Liguori
2013-Nov-13 01:48 UTC
[PATCH] xen-gnttab: do not add m2p override entries for blkback mappings
From: Anthony Liguori <aliguori@amazon.com> Commit 5dc03639 switched blkback to also add m2p override entries when mapping grant pages but history seems to have forgotten why this is useful if it ever was. The blkback driver does not need m2p override entries to exist and there is significant overhead due to the locking in the m2p override table. We see about a 10% improvement in IOP rate with this patch applied running FIO in the guest. See http://article.gmane.org/gmane.linux.kernel/1590932 for a full analysis of current users. Signed-off-by: Anthony Liguori <aliguori@amazon.com> --- A backported version of this has been heavily tested but the testing against the latest Linux tree is light so far. --- drivers/block/xen-blkback/blkback.c | 12 ++++----- drivers/xen/gntdev.c | 4 +-- drivers/xen/grant-table.c | 49 ++++++++++++++++++++++++++++------- include/xen/grant_table.h | 14 ++++++++-- 4 files changed, 59 insertions(+), 20 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index bf4b9d2..53ea90e 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -286,7 +286,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || !rb_next(&persistent_gnt->node)) { ret = gnttab_unmap_refs(unmap, NULL, pages, - segs_to_unmap); + segs_to_unmap, 0); BUG_ON(ret); put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; @@ -322,7 +322,7 @@ static void unmap_purged_grants(struct work_struct *work) if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { ret = gnttab_unmap_refs(unmap, NULL, pages, - segs_to_unmap); + segs_to_unmap, 0); BUG_ON(ret); put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; @@ -330,7 +330,7 @@ static void unmap_purged_grants(struct work_struct *work) kfree(persistent_gnt); } if (segs_to_unmap > 0) { - ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); + ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap, 0); BUG_ON(ret); put_free_pages(blkif, pages, segs_to_unmap); } @@ -671,14 +671,14 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif, pages[i]->handle = BLKBACK_INVALID_HANDLE; if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, - invcount); + invcount, 0); BUG_ON(ret); put_free_pages(blkif, unmap_pages, invcount); invcount = 0; } } if (invcount) { - ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount, 0); BUG_ON(ret); put_free_pages(blkif, unmap_pages, invcount); } @@ -740,7 +740,7 @@ again: } if (segs_to_map) { - ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); + ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map, 0); BUG_ON(ret); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index e41c79c..9ab1792 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -285,7 +285,7 @@ static int map_grant_pages(struct grant_map *map) pr_debug("map %d+%d\n", map->index, map->count); err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, - map->pages, map->count); + map->pages, map->count, 1); if (err) return err; @@ -317,7 +317,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) err = gnttab_unmap_refs(map->unmap_ops + offset, use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, - pages); + pages, 1); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index c4d2298..081be8d 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -881,7 +881,8 @@ EXPORT_SYMBOL_GPL(gnttab_batch_copy); int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count, + int override) { int i, ret; bool lazy = false; @@ -918,10 +919,29 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, } else { mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } - ret = m2p_add_override(mfn, pages[i], kmap_ops ? - &kmap_ops[i] : NULL); - if (ret) - return ret; + + if (override) { + ret = m2p_add_override(mfn, pages[i], kmap_ops ? + &kmap_ops[i] : NULL); + if (ret) + return ret; + } else { + unsigned long pfn, old_mfn; + + pfn = page_to_pfn(pages[i]); + old_mfn = pfn_to_mfn(pfn); + + /* Save previous MFN in page private*/ + WARN_ON(PagePrivate(pages[i])); + SetPagePrivate(pages[i]); + set_page_private(pages[i], old_mfn); + + if (!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))) { + ret = -ENOMEM; + break; + } + } + } if (lazy) @@ -933,7 +953,8 @@ EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count, + int override) { int i, ret; bool lazy = false; @@ -951,10 +972,18 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i], kmap_ops ? - &kmap_ops[i] : NULL); - if (ret) - return ret; + if (override) { + ret = m2p_remove_override(pages[i], kmap_ops ? + &kmap_ops[i] : NULL); + if (ret) + return ret; + } else { + /* Restore saved MFN */ + WARN_ON(!PagePrivate(pages[i])); + set_phys_to_machine(page_to_pfn(pages[i]), + page_private(pages[i])); + ClearPagePrivate(pages[i]); + } } if (lazy) diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 694dcaf..7fcb960 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -183,12 +183,22 @@ unsigned int gnttab_max_grant_frames(void); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) +/* override is used to add m2p override table entries when mapping the + * grant references. Currently there are only two callers to this function, + * blkback and gntdev. gntdev needs all grant mappings to have corresponding + * m2p override table entries but blkback currently doesn''t. This is because + * blkback can gracefully handle the case where m2p(p2m(pfn)) fails with + * foreign pfns. If you cannot handle this correctly, you need to set + * override 1 when calling the map and unmap functions. + */ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count); + struct page **pages, unsigned int count, + int override); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_map_grant_ref *kunmap_ops, - struct page **pages, unsigned int count); + struct page **pages, unsigned int count, + int override); /* Perform a batch of grant map/copy operations. Retry every batch slot * for which the hypervisor returns GNTST_eagain. This is typically due -- 1.7.9.5
Matt Wilson
2013-Nov-13 02:01 UTC
Re: [PATCH] xen-gnttab: do not add m2p override entries for blkback mappings
On Tue, Nov 12, 2013 at 05:48:56PM -0800, Anthony Liguori wrote:> From: Anthony Liguori <aliguori@amazon.com> > > Commit 5dc03639 switched blkback to also add m2p override entries > when mapping grant pages but history seems to have forgotten why > this is useful if it ever was. > > The blkback driver does not need m2p override entries to exist > and there is significant overhead due to the locking in the m2p > override table. We see about a 10% improvement in IOP rate with > this patch applied running FIO in the guest. > > See http://article.gmane.org/gmane.linux.kernel/1590932 for a full > analysis of current users. > > Signed-off-by: Anthony Liguori <aliguori@amazon.com>Reviewed-by: Matt Wilson <msw@amazon.com> One comment for discussion below.> --- > A backported version of this has been heavily tested but the testing > against the latest Linux tree is light so far.[...]> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c > index c4d2298..081be8d 100644 > --- a/drivers/xen/grant-table.c > +++ b/drivers/xen/grant-table.c > @@ -881,7 +881,8 @@ EXPORT_SYMBOL_GPL(gnttab_batch_copy); > > int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, > struct gnttab_map_grant_ref *kmap_ops, > - struct page **pages, unsigned int count) > + struct page **pages, unsigned int count, > + int override) > { > int i, ret; > bool lazy = false; > @@ -918,10 +919,29 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, > } else { > mfn = PFN_DOWN(map_ops[i].dev_bus_addr); > } > - ret = m2p_add_override(mfn, pages[i], kmap_ops ? > - &kmap_ops[i] : NULL); > - if (ret) > - return ret; > + > + if (override) { > + ret = m2p_add_override(mfn, pages[i], kmap_ops ? > + &kmap_ops[i] : NULL); > + if (ret) > + return ret;The original code, which remains unmodified by this patch, is returning without calling arch_leave_lazy_mmu_mode() at the bottom of this function.> + } else { > + unsigned long pfn, old_mfn; > + > + pfn = page_to_pfn(pages[i]); > + old_mfn = pfn_to_mfn(pfn); > + > + /* Save previous MFN in page private*/ > + WARN_ON(PagePrivate(pages[i])); > + SetPagePrivate(pages[i]); > + set_page_private(pages[i], old_mfn); > + > + if (!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))) { > + ret = -ENOMEM; > + break;The new path doesn''t repeat this error. I think that the override path needs to be changed to do the same in a separate patch. --msw> + } > + } > + > } > > if (lazy)arch_leave_lazy_mmu_mode(); return ret; }
David Vrabel
2013-Nov-13 10:36 UTC
Re: [PATCH] xen-gnttab: do not add m2p override entries for blkback mappings
On 13/11/13 01:48, Anthony Liguori wrote:> From: Anthony Liguori <aliguori@amazon.com> > > Commit 5dc03639 switched blkback to also add m2p override entries > when mapping grant pages but history seems to have forgotten why > this is useful if it ever was. > > The blkback driver does not need m2p override entries to exist > and there is significant overhead due to the locking in the m2p > override table. We see about a 10% improvement in IOP rate with > this patch applied running FIO in the guest. > > See http://article.gmane.org/gmane.linux.kernel/1590932 for a full > analysis of current users.I think it would be better if it was made clearer what m2p_add/remove_override() is for (i.e., allowing get_user_pages_fast() to work) so there isn''t this confusion in the future. Please add some comments for this and move the calls into the gntdev driver. In the future I would like to see the grant map/unmap done in m2p_add/remove_override() moved into gntdev as well, but this isn''t a requirement for this performance fix. As a prerequiste, the call to dma_mark_clean() in xen_swiotlb_unmap() needs to be removed and replaced with a comment explaining why this now differs from the generic implementation. i.e., the necessary phys_to_virt() will not work for foreign pages and dma_mark_clean() is a nop on all Xen supported architectures. David
Ian Campbell
2013-Nov-13 10:43 UTC
Re: [PATCH] xen-gnttab: do not add m2p override entries for blkback mappings
On Tue, 2013-11-12 at 17:48 -0800, Anthony Liguori wrote:> See http://article.gmane.org/gmane.linux.kernel/1590932 for a full > analysis of current users.Silly nit: Please can you include the message ID in case this goes away, or use a URL which includes the ID already so it can be translated if need be. I think LKML has a redirector thing you can use or if it went to xen-devel you can use http://bugs.xenproject.org/xen/mid/<message-id> Ian.
Stefano Stabellini
2013-Nov-13 13:13 UTC
Re: [PATCH] xen-gnttab: do not add m2p override entries for blkback mappings
On Wed, 13 Nov 2013, David Vrabel wrote:> On 13/11/13 01:48, Anthony Liguori wrote: > > From: Anthony Liguori <aliguori@amazon.com> > > > > Commit 5dc03639 switched blkback to also add m2p override entries > > when mapping grant pages but history seems to have forgotten why > > this is useful if it ever was. > > > > The blkback driver does not need m2p override entries to exist > > and there is significant overhead due to the locking in the m2p > > override table. We see about a 10% improvement in IOP rate with > > this patch applied running FIO in the guest. > > > > See http://article.gmane.org/gmane.linux.kernel/1590932 for a full > > analysis of current users. > > I think it would be better if it was made clearer what > m2p_add/remove_override() is for (i.e., allowing get_user_pages_fast() > to work) so there isn''t this confusion in the future. Please add some > comments for this and move the calls into the gntdev driver.Even though get_user_pages_fast is obviously the most important use case for the m2p_override, it is not the only one, see below.> As a prerequiste, the call to dma_mark_clean() in xen_swiotlb_unmap() > needs to be removed and replaced with a comment explaining why this now > differs from the generic implementation. i.e., the necessary > phys_to_virt() will not work for foreign pagesActually it is machine_to_phys that is not going to work and only on x86. machine_to_phys is going to be fine for foreign pages on ARM. Let''s be clear about this: without the m2p_override on x86 xen_swiotlb_unmap is going to fail and that undeniably is a mistake. However it is a mistake that happens not to cause any problems today. In fact the following comment is not right: +/* override is used to add m2p override table entries when mapping the + * grant references. Currently there are only two callers to this function, + * blkback and gntdev. gntdev needs all grant mappings to have corresponding + * m2p override table entries but blkback currently doesn''t. This is because + * blkback can gracefully handle the case where m2p(p2m(pfn)) fails with + * foreign pfns. If you cannot handle this correctly, you need to set + * override 1 when calling the map and unmap functions. + */ I would add the following comment to xen_unmap_single instead: /* On x86 xen_bus_to_phys is going to fail for foreign pages unless they * have been added to the m2p_override. Only gntdev currently adds them to * the m2p_override, while blkback does not. As a consequence any foreign * pages mapped by blkback and used for DMA are not going to be unmapped * correctly here. Fortunately this is not a problem because this function * is actually a nop for non-swiotlb pages on x86. */> and dma_mark_clean() is a nop on all Xen supported architectures.That''s true, it should be removed. It is just confusing.