Wei Liu
2013-Aug-23 22:27 UTC
[PATCH] xenbus_client: extend interface to suppurt multi-page ring
Originally Xen PV drivers only use single-page ring to pass along information. This might limit the throughput between frontend and backend. The patch extends Xenbus driver to support multi-page ring, which in general should improve throughput if ring is the bottleneck. Changes to various frontend / backend to adapt to the new interface are also included. Affected Xen drivers: * blkfront/back * netfront/back * pcifront/back * tpmfront The interface is documented, as before, in xenbus_client.c. Signed-off-by: Wei Liu <wei.liu2@citrix.com> --- drivers/block/xen-blkback/xenbus.c | 5 +- drivers/block/xen-blkfront.c | 5 +- drivers/char/tpm/xen-tpmfront.c | 5 +- drivers/net/xen-netback/netback.c | 4 +- drivers/net/xen-netfront.c | 9 +- drivers/pci/xen-pcifront.c | 5 +- drivers/xen/xen-pciback/xenbus.c | 2 +- drivers/xen/xenbus/xenbus_client.c | 386 ++++++++++++++++++++++++++---------- include/xen/xenbus.h | 19 +- 9 files changed, 316 insertions(+), 124 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index fe5c3cd..df79278 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -176,7 +176,7 @@ fail: return ERR_PTR(-ENOMEM); } -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, +static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, unsigned int evtchn) { int err; @@ -185,7 +185,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, if (blkif->irq) return 0; - err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); + err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 0, + &blkif->blk_ring); if (err < 0) return err; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a4660bb..8c186a0 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1144,6 +1144,7 @@ static int setup_blkring(struct xenbus_device *dev, { struct blkif_sring *sring; int err; + grant_ref_t gref; info->ring_ref = GRANT_INVALID_REF; @@ -1155,13 +1156,13 @@ static int setup_blkring(struct xenbus_device *dev, SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); + err = xenbus_grant_ring(dev, info->ring.sring, 0, &gref); if (err < 0) { free_page((unsigned long)sring); info->ring.sring = NULL; goto fail; } - info->ring_ref = err; + info->ring_ref = gref; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 7a7929b..e5d47823 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -270,6 +270,7 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv) struct xenbus_transaction xbt; const char *message = NULL; int rv; + grant_ref_t gref; priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); if (!priv->shr) { @@ -277,11 +278,11 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv) return -ENOMEM; } - rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr)); + rv = xenbus_grant_ring(dev, &priv->shr, 0, &gref); if (rv < 0) return rv; - priv->ring_ref = rv; + priv->ring_ref = gref; rv = xenbus_alloc_evtchn(dev, &priv->evtchn); if (rv) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 64828de..73eb928 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1856,7 +1856,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, int err = -ENOMEM; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - tx_ring_ref, &addr); + &tx_ring_ref, 0, &addr); if (err) goto err; @@ -1864,7 +1864,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - rx_ring_ref, &addr); + &rx_ring_ref, 0, &addr); if (err) goto err; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 36808bf..5a784dc 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1593,6 +1593,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) int err; struct net_device *netdev = info->netdev; unsigned int feature_split_evtchn; + grant_ref_t gref; info->tx_ring_ref = GRANT_INVALID_REF; info->rx_ring_ref = GRANT_INVALID_REF; @@ -1621,11 +1622,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) SHARED_RING_INIT(txs); FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(txs)); + err = xenbus_grant_ring(dev, txs, 0, &gref); if (err < 0) goto grant_tx_ring_fail; - info->tx_ring_ref = err; + info->tx_ring_ref = gref; rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); if (!rxs) { err = -ENOMEM; @@ -1635,10 +1636,10 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) SHARED_RING_INIT(rxs); FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); + err = xenbus_grant_ring(dev, rxs, 0, &gref); if (err < 0) goto grant_rx_ring_fail; - info->rx_ring_ref = err; + info->rx_ring_ref = gref; if (feature_split_evtchn) err = setup_netfront_split(info); diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index f7197a7..efdde2f 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -771,12 +771,13 @@ static int pcifront_publish_info(struct pcifront_device *pdev) { int err = 0; struct xenbus_transaction trans; + grant_ref_t gref; - err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 0, &gref); if (err < 0) goto out; - pdev->gnt_ref = err; + pdev->gnt_ref = gref; err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); if (err) diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index a9ed867..644faf0 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -111,7 +111,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", gnt_ref, remote_evtchn); - err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); + err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 0, &vaddr); if (err < 0) { xenbus_dev_fatal(pdev->xdev, err, "Error mapping other domain page in ours."); diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index ec097d6..1065796 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -51,17 +51,25 @@ struct xenbus_map_node { struct list_head next; union { - struct vm_struct *area; /* PV */ - struct page *page; /* HVM */ + struct { + struct vm_struct *area; + } pv; + struct { + struct page *pages[XENBUS_MAX_RING_PAGES]; + void *addr; + } hvm; }; - grant_handle_t handle; + grant_handle_t handles[XENBUS_MAX_RING_PAGES]; + unsigned int nr_handles_order; }; static DEFINE_SPINLOCK(xenbus_valloc_lock); static LIST_HEAD(xenbus_valloc_pages); struct xenbus_ring_ops { - int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); + int (*map)(struct xenbus_device *dev, + grant_ref_t *gnt_refs, unsigned int grefs_order, + void **vaddr); int (*unmap)(struct xenbus_device *dev, void *vaddr); }; @@ -357,17 +365,40 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, /** * xenbus_grant_ring * @dev: xenbus device - * @ring_mfn: mfn of ring to grant - - * Grant access to the given @ring_mfn to the peer of the given device. Return - * 0 on success, or -errno on error. On error, the device will switch to + * @vaddr: starting virtual address of the ring + * @pages_order: order of pages to be granted + * @grefs: grant reference array to be filled in + * + * Grant access to the given @vaddr to the peer of the given device. + * Then fill in @grefs with grant references. Return 0 on success, or + * -errno on error. On error, the device will switch to * XenbusStateClosing, and the error will be saved in the store. */ -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + unsigned int pages_order, grant_ref_t *grefs) { - int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); - if (err < 0) - xenbus_dev_fatal(dev, err, "granting access to ring page"); + int err; + int i; + unsigned int nr_pages = (1U << pages_order); + + for (i = 0; i < nr_pages; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + err = gnttab_grant_foreign_access(dev->otherend_id, + virt_to_mfn(addr), 0); + if (err < 0) { + xenbus_dev_fatal(dev, err, + "granting access to ring page"); + goto fail; + } + grefs[i] = err; + } + + return 0; + +fail: + for (; i >= 0; i--) + gnttab_end_foreign_access_ref(grefs[i], 0); return err; } EXPORT_SYMBOL_GPL(xenbus_grant_ring); @@ -448,62 +479,128 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); /** * xenbus_map_ring_valloc * @dev: xenbus device - * @gnt_ref: grant reference + * @gnt_refs: grant reference array + * @grefs_order: order of grant references * @vaddr: pointer to address to be filled out by mapping * - * Based on Rusty Russell''s skeleton driver''s map_page. - * Map a page of memory into this domain from another domain''s grant table. - * xenbus_map_ring_valloc allocates a page of virtual address space, maps the - * page to that address, and sets *vaddr to that address. - * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) - * or -ENOMEM on error. If an error is returned, device will switch to + * Map 2^@grefs_order pages of memory into this domain from another + * domain''s grant table. xenbus_map_ring_valloc allocates 2^@nr_grefs + * pages of virtual address space, maps the pages to that address, and + * sets *vaddr to that address. Returns 0 on success, and GNTST_* + * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on + * error. If an error is returned, device will switch to * XenbusStateClosing and the error message will be saved in XenStore. */ -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, + unsigned int grefs_order, void **vaddr) { - return ring_ops->map(dev, gnt_ref, vaddr); + return ring_ops->map(dev, gnt_refs, grefs_order, vaddr); } EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); +static int __xenbus_map_ring(struct xenbus_device *dev, + grant_ref_t *gnt_refs, + unsigned int grefs_order, + grant_handle_t *handles, + void **addrs, + unsigned int flags, + bool *leaked) +{ + struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + int i, j; + int err = GNTST_okay; + unsigned int nr_grefs = (1U << grefs_order); + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + + for (i = 0; i < nr_grefs; i++) { + gnttab_set_map_op(&map[i], (unsigned long)addrs[i], + GNTMAP_host_map | flags, + gnt_refs[i], + dev->otherend_id); + handles[i] = INVALID_GRANT_HANDLE; + } + + gnttab_batch_map(map, i); + + for (i = 0; i < nr_grefs; i++) { + if (map[i].status != GNTST_okay) { + err = map[i].status; + xenbus_dev_fatal(dev, map[i].status, + "mapping in shared page %d from domain %d", + gnt_refs[i], dev->otherend_id); + goto fail; + } else + handles[i] = map[i].handle; + } + + return GNTST_okay; + + fail: + for (i = j = 0; i < nr_grefs; i++) { + if (handles[i] != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i], + GNTMAP_host_map, handles[i]); + j++; + } + } + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j)) + BUG(); + + *leaked = false; + for (i = 0; i < j; i++) { + if (unmap[i].status != GNTST_okay) { + *leaked = true; + break; + } + } + + return err; + +} + static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + grant_ref_t *gnt_refs, + unsigned int grefs_order, + void **vaddr) { - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map | GNTMAP_contains_pte, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; struct xenbus_map_node *node; struct vm_struct *area; - pte_t *pte; + pte_t *pte[XENBUS_MAX_RING_PAGES]; + void *addrs[XENBUS_MAX_RING_PAGES]; + int err = GNTST_okay; + int i; + bool leaked; + unsigned int nr_grefs = (1U << grefs_order); *vaddr = NULL; + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; - area = alloc_vm_area(PAGE_SIZE, &pte); + area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte); if (!area) { kfree(node); return -ENOMEM; } - op.host_addr = arbitrary_virt_to_machine(pte).maddr; - - gnttab_batch_map(&op, 1); + for (i = 0; i < nr_grefs; i++) + addrs[i] = (void *)arbitrary_virt_to_machine(pte[i]).maddr; - if (op.status != GNTST_okay) { - free_vm_area(area); - kfree(node); - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - return op.status; - } + err = __xenbus_map_ring(dev, gnt_refs, grefs_order, node->handles, + addrs, GNTMAP_contains_pte, &leaked); + if (err) + goto failed; - node->handle = op.handle; - node->area = area; + node->nr_handles_order = grefs_order; + node->pv.area = area; spin_lock(&xenbus_valloc_lock); list_add(&node->next, &xenbus_valloc_pages); @@ -511,14 +608,32 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, *vaddr = area->addr; return 0; + +failed: + if (!leaked) + free_vm_area(area); + else + pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs); + + kfree(node); + return err; } static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + grant_ref_t *gnt_ref, + unsigned int grefs_order, + void **vaddr) { struct xenbus_map_node *node; + int i; int err; + void *addrs[XENBUS_MAX_RING_PAGES]; void *addr; + bool leaked = false; + unsigned int nr_grefs = (1U << grefs_order); + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; *vaddr = NULL; @@ -526,15 +641,29 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, if (!node) return -ENOMEM; - err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); + err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages, + false /* lowmem */); if (err) goto out_err; - addr = pfn_to_kaddr(page_to_pfn(node->page)); + for (i = 0; i < nr_grefs; i++) + addrs[i] = pfn_to_kaddr(page_to_pfn(node->hvm.pages[i])); + + err = __xenbus_map_ring(dev, gnt_ref, grefs_order, node->handles, + addrs, 0 /* no flags */, &leaked); + node->nr_handles_order = grefs_order; - err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); if (err) - goto out_err_free_ballooned_pages; + goto out_free_ballooned_pages; + + addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP, + PAGE_KERNEL_IO); + if (!addr) { + err = -ENOMEM; + goto out_xenbus_unmap_ring; + } + + node->hvm.addr = addr; spin_lock(&xenbus_valloc_lock); list_add(&node->next, &xenbus_valloc_pages); @@ -543,8 +672,16 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, *vaddr = addr; return 0; - out_err_free_ballooned_pages: - free_xenballooned_pages(1, &node->page); + out_xenbus_unmap_ring: + if (!leaked) + xenbus_unmap_ring(dev, node->handles, node->nr_handles_order, + addrs); + else + pr_alert("leaking %p size %u page(s)", + addr, nr_grefs); + out_free_ballooned_pages: + if (!leaked) + free_xenballooned_pages(nr_grefs, node->hvm.pages); out_err: kfree(node); return err; @@ -554,35 +691,33 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, /** * xenbus_map_ring * @dev: xenbus device - * @gnt_ref: grant reference - * @handle: pointer to grant handle to be filled - * @vaddr: address to be mapped to + * @gnt_refs: grant reference array + * @grefs_order: order of grant reference + * @handles: pointer to grant handle to be filled + * @vaddrs: addresses to be mapped to + * @leaked: fail to clean up a failed map, caller should not free vaddr * - * Map a page of memory into this domain from another domain''s grant table. + * Map pages of memory into this domain from another domain''s grant table. * xenbus_map_ring does not allocate the virtual address space (you must do - * this yourself!). It only maps in the page to the specified address. + * this yourself!). It only maps in the pages to the specified address. * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) - * or -ENOMEM on error. If an error is returned, device will switch to - * XenbusStateClosing and the error message will be saved in XenStore. + * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to + * XenbusStateClosing and the first error message will be saved in XenStore. + * Further more if we fail to map the ring, caller should check @leaked. + * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller + * should not free the address space of @vaddr. */ -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr) +int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs, + unsigned int grefs_order, grant_handle_t *handles, + void *vaddrs, bool *leaked) { - struct gnttab_map_grant_ref op; + int nr_grefs = 1U << grefs_order; - gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref, - dev->otherend_id); + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; - gnttab_batch_map(&op, 1); - - if (op.status != GNTST_okay) { - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - } else - *handle = op.handle; - - return op.status; + return __xenbus_map_ring(dev, gnt_refs, grefs_order, handles, + vaddrs, 0 /* no flags */, leaked); } EXPORT_SYMBOL_GPL(xenbus_map_ring); @@ -592,8 +727,7 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring); * @dev: xenbus device * @vaddr: addr to unmap * - * Based on Rusty Russell''s skeleton driver''s unmap_page. - * Unmap a page of memory in this domain that was imported from another domain. + * Unmap memory in this domain that was imported from another domain. * Use xenbus_unmap_ring_vfree if you mapped in your memory with * xenbus_map_ring_valloc (it will free the virtual address space). * Returns 0 on success and returns GNTST_* on error @@ -608,14 +742,16 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) { struct xenbus_map_node *node; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; unsigned int level; + int i; + bool leaked = false; + int err; + unsigned int nr; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { - if (node->area->addr == vaddr) { + if (node->pv.area->addr == vaddr) { list_del(&node->next); goto found; } @@ -630,22 +766,41 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) return GNTST_bad_virt_addr; } - op.handle = node->handle; - op.host_addr = arbitrary_virt_to_machine( - lookup_address((unsigned long)vaddr, &level)).maddr; + nr = (1U << node->nr_handles_order); + for (i = 0; i < nr; i++) { + unsigned long addr; + + addr = (unsigned long)vaddr + (PAGE_SIZE * i); + unmap[i].host_addr = arbitrary_virt_to_machine( + lookup_address(addr, &level)).maddr; + unmap[i].dev_bus_addr = 0; + unmap[i].handle = node->handles[i]; + } - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) BUG(); - if (op.status == GNTST_okay) - free_vm_area(node->area); + err = GNTST_okay; + leaked = false; + for (i = 0; i < nr; i++) { + if (unmap[i].status != GNTST_okay) { + leaked = true; + xenbus_dev_error(dev, unmap[i].status, + "unmapping page at handle %d error %d", + node->handles[i], unmap[i].status); + err = unmap[i].status; + break; + } + } + + if (!leaked) + free_vm_area(node->pv.area); else - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - node->handle, op.status); + pr_alert("leaking VM area %p size %u page(s)", + node->pv.area, nr); kfree(node); - return op.status; + return err; } static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) @@ -653,10 +808,13 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) int rv; struct xenbus_map_node *node; void *addr; + void *addrs[XENBUS_MAX_RING_PAGES]; + unsigned int nr; + int i; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { - addr = pfn_to_kaddr(page_to_pfn(node->page)); + addr = node->hvm.addr; if (addr == vaddr) { list_del(&node->next); goto found; @@ -672,12 +830,18 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) return GNTST_bad_virt_addr; } - rv = xenbus_unmap_ring(dev, node->handle, addr); + nr = (1U << node->nr_handles_order); + + for (i = 0; i < nr; i++) + addrs[i] = pfn_to_kaddr(page_to_pfn(node->hvm.pages[i])); + rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles_order, + addrs); if (!rv) - free_xenballooned_pages(1, &node->page); + vunmap(vaddr); else - WARN(1, "Leaking %p\n", vaddr); + WARN(1, "Leaking %p, size %u page(s)\n", vaddr, + nr); kfree(node); return rv; @@ -686,29 +850,45 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) /** * xenbus_unmap_ring * @dev: xenbus device - * @handle: grant handle - * @vaddr: addr to unmap + * @handles: grant handle array + * @nr_handles_order: order of handles in the array + * @vaddrs: addresses to unmap * - * Unmap a page of memory in this domain that was imported from another domain. + * Unmap memory in this domain that was imported from another domain. * Returns 0 on success and returns GNTST_* on error * (see xen/include/interface/grant_table.h). */ int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr) + grant_handle_t *handles, unsigned int nr_handles_order, + void **vaddrs) { - struct gnttab_unmap_grant_ref op; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + int i; + int err; + unsigned int nr_handles = (1U << nr_handles_order); - gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, handle); + if (nr_handles > XENBUS_MAX_RING_PAGES) + return -EINVAL; - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + for (i = 0; i < nr_handles; i++) + gnttab_set_unmap_op(&unmap[i], (unsigned long)vaddrs[i], + GNTMAP_host_map, handles[i]); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) BUG(); - if (op.status != GNTST_okay) - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - handle, op.status); + err = GNTST_okay; + for (i = 0; i < nr_handles; i++) { + if (unmap[i].status != GNTST_okay) { + xenbus_dev_error(dev, unmap[i].status, + "unmapping page at handle %d error %d", + handles[i], unmap[i].status); + err = unmap[i].status; + break; + } + } - return op.status; + return err; } EXPORT_SYMBOL_GPL(xenbus_unmap_ring); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 569c07f..dc0c3d6 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -46,6 +46,10 @@ #include <xen/interface/io/xenbus.h> #include <xen/interface/io/xs_wire.h> +#define XENBUS_MAX_RING_PAGE_ORDER 4 +#define XENBUS_MAX_RING_PAGES (1U << XENBUS_MAX_RING_PAGE_ORDER) +#define INVALID_GRANT_HANDLE (~0U) + /* Register callback to watch this node. */ struct xenbus_watch { @@ -196,15 +200,18 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, const char *pathfmt, ...); int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); -int xenbus_map_ring_valloc(struct xenbus_device *dev, - int gnt_ref, void **vaddr); -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr); +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + unsigned int nr_pages, grant_ref_t *grefs); +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, + unsigned int nr_grefs, void **vaddr); +int xenbus_map_ring(struct xenbus_device *dev, + grant_ref_t *gnt_refs, unsigned int nr_grefs, + grant_handle_t *handles, void *vaddr, bool *leaked); int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr); + grant_handle_t *handles, unsigned int nr_handles, + void **vaddr); int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port); -- 1.7.10.4
Ian Campbell
2013-Aug-27 08:23 UTC
Re: [PATCH] xenbus_client: extend interface to suppurt multi-page ring
On Fri, 2013-08-23 at 23:27 +0100, Wei Liu wrote:> Originally Xen PV drivers only use single-page ring to pass along > information. This might limit the throughput between frontend and > backend. > > The patch extends Xenbus driver to support multi-page ring, which in > general should improve throughput if ring is the bottleneck. Changes to > various frontend / backend to adapt to the new interface are also > included.The changes to the front/backends here simply adapt to the new interface but don''t actually make use of it, right?> @@ -196,15 +200,18 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, > const char *pathfmt, ...); > > int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); > -int xenbus_map_ring_valloc(struct xenbus_device *dev, > - int gnt_ref, void **vaddr); > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > - grant_handle_t *handle, void *vaddr); > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > + unsigned int nr_pages, grant_ref_t *grefs); > +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, > + unsigned int nr_grefs, void **vaddr);In the definition nr_grefs is called grefs_order. From the usage I think the definition and not this prototype is correct. Ian.
Wei Liu
2013-Aug-27 08:25 UTC
Re: [PATCH] xenbus_client: extend interface to suppurt multi-page ring
On Tue, Aug 27, 2013 at 09:23:24AM +0100, Ian Campbell wrote:> On Fri, 2013-08-23 at 23:27 +0100, Wei Liu wrote: > > Originally Xen PV drivers only use single-page ring to pass along > > information. This might limit the throughput between frontend and > > backend. > > > > The patch extends Xenbus driver to support multi-page ring, which in > > general should improve throughput if ring is the bottleneck. Changes to > > various frontend / backend to adapt to the new interface are also > > included. > > The changes to the front/backends here simply adapt to the new interface > but don''t actually make use of it, right? >No, they don''t.> > @@ -196,15 +200,18 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, > > const char *pathfmt, ...); > > > > int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); > > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); > > -int xenbus_map_ring_valloc(struct xenbus_device *dev, > > - int gnt_ref, void **vaddr); > > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > > - grant_handle_t *handle, void *vaddr); > > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > > + unsigned int nr_pages, grant_ref_t *grefs); > > +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, > > + unsigned int nr_grefs, void **vaddr); > > In the definition nr_grefs is called grefs_order. From the usage I think > the definition and not this prototype is correct. >Good catch, thanks. Wei.> Ian.
Konrad Rzeszutek Wilk
2013-Aug-27 14:12 UTC
Re: [PATCH] xenbus_client: extend interface to suppurt multi-page ring
On Tue, Aug 27, 2013 at 09:25:42AM +0100, Wei Liu wrote:> On Tue, Aug 27, 2013 at 09:23:24AM +0100, Ian Campbell wrote: > > On Fri, 2013-08-23 at 23:27 +0100, Wei Liu wrote: > > > Originally Xen PV drivers only use single-page ring to pass along > > > information. This might limit the throughput between frontend and > > > backend. > > > > > > The patch extends Xenbus driver to support multi-page ring, which in > > > general should improve throughput if ring is the bottleneck. Changes to > > > various frontend / backend to adapt to the new interface are also > > > included. > > > > The changes to the front/backends here simply adapt to the new interface > > but don''t actually make use of it, right? > > > > No, they don''t.I think the scsi front or usb front (in a different branch) did use it. That is why I remember seeing something similar to this. Either way, I am comfortable putting this in - will make it easier for folks to experiement with enlarging the ring.> > > > @@ -196,15 +200,18 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, > > > const char *pathfmt, ...); > > > > > > int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); > > > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); > > > -int xenbus_map_ring_valloc(struct xenbus_device *dev, > > > - int gnt_ref, void **vaddr); > > > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > > > - grant_handle_t *handle, void *vaddr); > > > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > > > + unsigned int nr_pages, grant_ref_t *grefs); > > > +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, > > > + unsigned int nr_grefs, void **vaddr); > > > > In the definition nr_grefs is called grefs_order. From the usage I think > > the definition and not this prototype is correct. > > > > Good catch, thanks. > > Wei. > > > Ian. > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Wei Liu
2013-Aug-27 14:17 UTC
Re: [PATCH] xenbus_client: extend interface to suppurt multi-page ring
On Tue, Aug 27, 2013 at 10:12:07AM -0400, Konrad Rzeszutek Wilk wrote:> On Tue, Aug 27, 2013 at 09:25:42AM +0100, Wei Liu wrote: > > On Tue, Aug 27, 2013 at 09:23:24AM +0100, Ian Campbell wrote: > > > On Fri, 2013-08-23 at 23:27 +0100, Wei Liu wrote: > > > > Originally Xen PV drivers only use single-page ring to pass along > > > > information. This might limit the throughput between frontend and > > > > backend. > > > > > > > > The patch extends Xenbus driver to support multi-page ring, which in > > > > general should improve throughput if ring is the bottleneck. Changes to > > > > various frontend / backend to adapt to the new interface are also > > > > included. > > > > > > The changes to the front/backends here simply adapt to the new interface > > > but don''t actually make use of it, right? > > > > > > > No, they don''t. > > I think the scsi front or usb front (in a different branch) did use it. > That is why I remember seeing something similar to this. >Oh, those are not upstreamed.> Either way, I am comfortable putting this in - will make it easier for > folks to experiement with enlarging the ring. > >Right. I will post another version when I collect enough comments... Wei.> > > > @@ -196,15 +200,18 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, > > > > const char *pathfmt, ...); > > > > > > > > int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); > > > > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); > > > > -int xenbus_map_ring_valloc(struct xenbus_device *dev, > > > > - int gnt_ref, void **vaddr); > > > > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > > > > - grant_handle_t *handle, void *vaddr); > > > > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > > > > + unsigned int nr_pages, grant_ref_t *grefs); > > > > +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, > > > > + unsigned int nr_grefs, void **vaddr); > > > > > > In the definition nr_grefs is called grefs_order. From the usage I think > > > the definition and not this prototype is correct. > > > > > > > Good catch, thanks. > > > > Wei. > > > > > Ian. > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xen.org > > http://lists.xen.org/xen-devel
David Vrabel
2013-Aug-27 14:26 UTC
Re: [PATCH] xenbus_client: extend interface to suppurt multi-page ring
On 23/08/13 23:27, Wei Liu wrote:> Originally Xen PV drivers only use single-page ring to pass along > information. This might limit the throughput between frontend and > backend.Why does the number of pages in the ring have to be a power of two? Should we be able to handle any number of pages? David
Wei Liu
2013-Aug-27 14:42 UTC
Re: [PATCH] xenbus_client: extend interface to suppurt multi-page ring
On Tue, Aug 27, 2013 at 03:26:48PM +0100, David Vrabel wrote:> On 23/08/13 23:27, Wei Liu wrote: > > Originally Xen PV drivers only use single-page ring to pass along > > information. This might limit the throughput between frontend and > > backend. > > Why does the number of pages in the ring have to be a power of two? > Should we be able to handle any number of pages? >No they don''t. There is no problem changing to handle any number of pages, given that the number of pages <= XENBUS_MAX_RING_PAGES. In fact my first version was like that. :-) I change to power of two because someone suggested that at Hackathon. But that''s pretty random suggestion too so it''s quite safe to ignore that. Further thought is that using arbitrary number would be more space-effecient. I will change to that. Wei.> David