Wei Liu
2013-Nov-10  17:21 UTC
[PATCH v5] xenbus_client: extend interface to suppurt multi-page ring
Originally Xen PV drivers only use single-page ring to pass along
information. This might limit the throughput between frontend and
backend.
The patch extends Xenbus driver to support multi-page ring, which in
general should improve throughput if ring is the bottleneck. Changes to
various frontend / backend to adapt to the new interface are also
included.
Affected Xen drivers:
* blkfront/back
* netfront/back
* pcifront/back
* tpmfront
The interface is documented, as before, in xenbus_client.c.
Change in V2:
* allow ring has arbitrary number of pages <= XENBUS_MAX_RING_PAGES
Change in V3:
* update function prototypes
* carefully deal with types of different sizes
Change in V4:
* use PAGE_KERNEL instead of PAGE_KERNEL_IO to avoid breakage on Arm
Change in V5:
* fix off-by-one error and other minor glitches spotted by Mathew Daley
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Konrad Wilk <konrad.wilk@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/block/xen-blkback/xenbus.c |    5 +-
 drivers/block/xen-blkfront.c       |    5 +-
 drivers/char/tpm/xen-tpmfront.c    |    5 +-
 drivers/net/xen-netback/netback.c  |    4 +-
 drivers/net/xen-netfront.c         |    9 +-
 drivers/pci/xen-pcifront.c         |    5 +-
 drivers/xen/xen-pciback/xenbus.c   |    2 +-
 drivers/xen/xenbus/xenbus_client.c |  388 ++++++++++++++++++++++++++----------
 include/xen/xenbus.h               |   20 +-
 9 files changed, 320 insertions(+), 123 deletions(-)
diff --git a/drivers/block/xen-blkback/xenbus.c
b/drivers/block/xen-blkback/xenbus.c
index c2014a0..eed33c8 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -176,7 +176,7 @@ fail:
 	return ERR_PTR(-ENOMEM);
 }
 
-static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
+static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
 			 unsigned int evtchn)
 {
 	int err;
@@ -185,7 +185,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned
long shared_page,
 	if (blkif->irq)
 		return 0;
 
-	err = xenbus_map_ring_valloc(blkif->be->dev, shared_page,
&blkif->blk_ring);
+	err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
+				     &blkif->blk_ring);
 	if (err < 0)
 		return err;
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a4660bb..ba0bc58 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1144,6 +1144,7 @@ static int setup_blkring(struct xenbus_device *dev,
 {
 	struct blkif_sring *sring;
 	int err;
+	grant_ref_t gref;
 
 	info->ring_ref = GRANT_INVALID_REF;
 
@@ -1155,13 +1156,13 @@ static int setup_blkring(struct xenbus_device *dev,
 	SHARED_RING_INIT(sring);
 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
-	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+	err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
 	if (err < 0) {
 		free_page((unsigned long)sring);
 		info->ring.sring = NULL;
 		goto fail;
 	}
-	info->ring_ref = err;
+	info->ring_ref = gref;
 
 	err = xenbus_alloc_evtchn(dev, &info->evtchn);
 	if (err)
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 94c280d..c90ab4f 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -235,6 +235,7 @@ static int setup_ring(struct xenbus_device *dev, struct
tpm_private *priv)
 	struct xenbus_transaction xbt;
 	const char *message = NULL;
 	int rv;
+	grant_ref_t gref;
 
 	priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	if (!priv->shr) {
@@ -242,11 +243,11 @@ static int setup_ring(struct xenbus_device *dev, struct
tpm_private *priv)
 		return -ENOMEM;
 	}
 
-	rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr));
+	rv = xenbus_grant_ring(dev, &priv->shr, 1, &gref);
 	if (rv < 0)
 		return rv;
 
-	priv->ring_ref = rv;
+	priv->ring_ref = gref;
 
 	rv = xenbus_alloc_evtchn(dev, &priv->evtchn);
 	if (rv)
diff --git a/drivers/net/xen-netback/netback.c
b/drivers/net/xen-netback/netback.c
index f3e591c..acedb352 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1610,7 +1610,7 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
 	int err = -ENOMEM;
 
 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-				     tx_ring_ref, &addr);
+				     &tx_ring_ref, 1, &addr);
 	if (err)
 		goto err;
 
@@ -1618,7 +1618,7 @@ int xenvif_map_frontend_rings(struct xenvif *vif,
 	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
 
 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
-				     rx_ring_ref, &addr);
+				     &rx_ring_ref, 1, &addr);
 	if (err)
 		goto err;
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 36808bf..0ddca5a9 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1593,6 +1593,7 @@ static int setup_netfront(struct xenbus_device *dev,
struct netfront_info *info)
 	int err;
 	struct net_device *netdev = info->netdev;
 	unsigned int feature_split_evtchn;
+	grant_ref_t gref;
 
 	info->tx_ring_ref = GRANT_INVALID_REF;
 	info->rx_ring_ref = GRANT_INVALID_REF;
@@ -1621,11 +1622,11 @@ static int setup_netfront(struct xenbus_device *dev,
struct netfront_info *info)
 	SHARED_RING_INIT(txs);
 	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
 
-	err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+	err = xenbus_grant_ring(dev, txs, 1, &gref);
 	if (err < 0)
 		goto grant_tx_ring_fail;
 
-	info->tx_ring_ref = err;
+	info->tx_ring_ref = gref;
 	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
 	if (!rxs) {
 		err = -ENOMEM;
@@ -1635,10 +1636,10 @@ static int setup_netfront(struct xenbus_device *dev,
struct netfront_info *info)
 	SHARED_RING_INIT(rxs);
 	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
 
-	err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+	err = xenbus_grant_ring(dev, rxs, 1, &gref);
 	if (err < 0)
 		goto grant_rx_ring_fail;
-	info->rx_ring_ref = err;
+	info->rx_ring_ref = gref;
 
 	if (feature_split_evtchn)
 		err = setup_netfront_split(info);
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index f7197a7..c35a44d 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -771,12 +771,13 @@ static int pcifront_publish_info(struct pcifront_device
*pdev)
 {
 	int err = 0;
 	struct xenbus_transaction trans;
+	grant_ref_t gref;
 
-	err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+	err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &gref);
 	if (err < 0)
 		goto out;
 
-	pdev->gnt_ref = err;
+	pdev->gnt_ref = gref;
 
 	err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
 	if (err)
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index a9ed867..a7fd6f5 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -111,7 +111,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device
*pdev, int gnt_ref,
 		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
 		gnt_ref, remote_evtchn);
 
-	err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+	err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
 	if (err < 0) {
 		xenbus_dev_fatal(pdev->xdev, err,
 				"Error mapping other domain page in ours.");
diff --git a/drivers/xen/xenbus/xenbus_client.c
b/drivers/xen/xenbus/xenbus_client.c
index ec097d6..fd7cd83 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -51,17 +51,25 @@
 struct xenbus_map_node {
 	struct list_head next;
 	union {
-		struct vm_struct *area; /* PV */
-		struct page *page;     /* HVM */
+		struct {
+			struct vm_struct *area;
+		} pv;
+		struct {
+			struct page *pages[XENBUS_MAX_RING_PAGES];
+			void *addr;
+		} hvm;
 	};
-	grant_handle_t handle;
+	grant_handle_t handles[XENBUS_MAX_RING_PAGES];
+	unsigned int   nr_handles;
 };
 
 static DEFINE_SPINLOCK(xenbus_valloc_lock);
 static LIST_HEAD(xenbus_valloc_pages);
 
 struct xenbus_ring_ops {
-	int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+	int (*map)(struct xenbus_device *dev,
+		   grant_ref_t *gnt_refs, unsigned int nr_grefs,
+		   void **vaddr);
 	int (*unmap)(struct xenbus_device *dev, void *vaddr);
 };
 
@@ -357,17 +365,39 @@ static void xenbus_switch_fatal(struct xenbus_device *dev,
int depth, int err,
 /**
  * xenbus_grant_ring
  * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
+ * @vaddr: starting virtual address of the ring
+ * @nr_pages: number of pages to be granted
+ * @grefs: grant reference array to be filled in
+ *
+ * Grant access to the given @vaddr to the peer of the given device.
+ * Then fill in @grefs with grant references.  Return 0 on success, or
+ * -errno on error.  On error, the device will switch to
  * XenbusStateClosing, and the error will be saved in the store.
  */
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+		      unsigned int nr_pages, grant_ref_t *grefs)
 {
-	int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
-	if (err < 0)
-		xenbus_dev_fatal(dev, err, "granting access to ring page");
+	int err;
+	int i, j;
+
+	for (i = 0; i < nr_pages; i++) {
+		unsigned long addr = (unsigned long)vaddr +
+			(PAGE_SIZE * i);
+		err = gnttab_grant_foreign_access(dev->otherend_id,
+						  virt_to_mfn(addr), 0);
+		if (err < 0) {
+			xenbus_dev_fatal(dev, err,
+					 "granting access to ring page");
+			goto fail;
+		}
+		grefs[i] = err;
+	}
+
+	return 0;
+
+fail:
+	for (j = 0; j < i; j++)
+		gnttab_end_foreign_access_ref(grefs[j], 0);
 	return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
@@ -448,62 +478,130 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
 /**
  * xenbus_map_ring_valloc
  * @dev: xenbus device
- * @gnt_ref: grant reference
+ * @gnt_refs: grant reference array
+ * @nr_grefs: number of grant references
  * @vaddr: pointer to address to be filled out by mapping
  *
- * Based on Rusty Russell''s skeleton driver''s map_page.
- * Map a page of memory into this domain from another domain''s grant
table.
- * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
- * page to that address, and sets *vaddr to that address.
- * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
- * or -ENOMEM on error. If an error is returned, device will switch to
+ * Map @nr_grefs pages of memory into this domain from another
+ * domain''s grant table.  xenbus_map_ring_valloc allocates @nr_grefs
+ * pages of virtual address space, maps the pages to that address, and
+ * sets *vaddr to that address.  Returns 0 on success, and GNTST_*
+ * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on
+ * error. If an error is returned, device will switch to
  * XenbusStateClosing and the error message will be saved in XenStore.
  */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void
**vaddr)
+int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
+			   unsigned int nr_grefs, void **vaddr)
 {
-	return ring_ops->map(dev, gnt_ref, vaddr);
+	return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
 
+/* N.B. sizeof(phys_addr_t) doesn''t always equal to sizeof(unsigned
+ * long), e.g. 32-on-64.  Caller is responsible for preparing the
+ * right array to feed into this function */
+static int __xenbus_map_ring(struct xenbus_device *dev,
+			     grant_ref_t *gnt_refs,
+			     unsigned int nr_grefs,
+			     grant_handle_t *handles,
+			     phys_addr_t *addrs,
+			     unsigned int flags,
+			     bool *leaked)
+{
+	struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES];
+	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
+	int i, j;
+	int err = GNTST_okay;
+
+	if (nr_grefs > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
+
+	for (i = 0; i < nr_grefs; i++) {
+		memset(&map[i], 0, sizeof(map[i]));
+		gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
+				  dev->otherend_id);
+		handles[i] = INVALID_GRANT_HANDLE;
+	}
+
+	gnttab_batch_map(map, i);
+
+	for (i = 0; i < nr_grefs; i++) {
+		if (map[i].status != GNTST_okay) {
+			err = map[i].status;
+			xenbus_dev_fatal(dev, map[i].status,
+					 "mapping in shared page %d from domain %d",
+					 gnt_refs[i], dev->otherend_id);
+			goto fail;
+		} else
+			handles[i] = map[i].handle;
+	}
+
+	return GNTST_okay;
+
+ fail:
+	for (i = j = 0; i < nr_grefs; i++) {
+		if (handles[i] != INVALID_GRANT_HANDLE) {
+			memset(&unmap[j], 0, sizeof(unmap[j]));
+			gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
+					    GNTMAP_host_map, handles[i]);
+			j++;
+		}
+	}
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
+		BUG();
+
+	*leaked = false;
+	for (i = 0; i < j; i++) {
+		if (unmap[i].status != GNTST_okay) {
+			*leaked = true;
+			break;
+		}
+	}
+
+	return err;
+}
+
 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-				     int gnt_ref, void **vaddr)
+				     grant_ref_t *gnt_refs,
+				     unsigned int nr_grefs,
+				     void **vaddr)
 {
-	struct gnttab_map_grant_ref op = {
-		.flags = GNTMAP_host_map | GNTMAP_contains_pte,
-		.ref   = gnt_ref,
-		.dom   = dev->otherend_id,
-	};
 	struct xenbus_map_node *node;
 	struct vm_struct *area;
-	pte_t *pte;
+	pte_t *ptes[XENBUS_MAX_RING_PAGES];
+	phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
+	int err = GNTST_okay;
+	int i;
+	bool leaked;
 
 	*vaddr = NULL;
 
+	if (nr_grefs > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
+
 	node = kzalloc(sizeof(*node), GFP_KERNEL);
 	if (!node)
 		return -ENOMEM;
 
-	area = alloc_vm_area(PAGE_SIZE, &pte);
+	area = alloc_vm_area(PAGE_SIZE * nr_grefs, ptes);
 	if (!area) {
 		kfree(node);
 		return -ENOMEM;
 	}
 
-	op.host_addr = arbitrary_virt_to_machine(pte).maddr;
-
-	gnttab_batch_map(&op, 1);
+	for (i = 0; i < nr_grefs; i++)
+		phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
 
-	if (op.status != GNTST_okay) {
-		free_vm_area(area);
-		kfree(node);
-		xenbus_dev_fatal(dev, op.status,
-				 "mapping in shared page %d from domain %d",
-				 gnt_ref, dev->otherend_id);
-		return op.status;
-	}
+	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
+				phys_addrs,
+				GNTMAP_host_map | GNTMAP_contains_pte,
+				&leaked);
+	if (err)
+		goto failed;
 
-	node->handle = op.handle;
-	node->area = area;
+	node->nr_handles = nr_grefs;
+	node->pv.area = area;
 
 	spin_lock(&xenbus_valloc_lock);
 	list_add(&node->next, &xenbus_valloc_pages);
@@ -511,14 +609,33 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device
*dev,
 
 	*vaddr = area->addr;
 	return 0;
+
+failed:
+	if (!leaked)
+		free_vm_area(area);
+	else
+		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
+
+	kfree(node);
+	return err;
 }
 
 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
-				      int gnt_ref, void **vaddr)
+				      grant_ref_t *gnt_ref,
+				      unsigned int nr_grefs,
+				      void **vaddr)
 {
 	struct xenbus_map_node *node;
+	int i;
 	int err;
 	void *addr;
+	bool leaked = false;
+	/* Why do we need two arrays? See comment of __xenbus_map_ring */
+	phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
+	unsigned long addrs[XENBUS_MAX_RING_PAGES];
+
+	if (nr_grefs > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
 
 	*vaddr = NULL;
 
@@ -526,15 +643,32 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device
*dev,
 	if (!node)
 		return -ENOMEM;
 
-	err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+	err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages,
+				       false /* lowmem */);
 	if (err)
 		goto out_err;
 
-	addr = pfn_to_kaddr(page_to_pfn(node->page));
+	for (i = 0; i < nr_grefs; i++) {
+		unsigned long pfn = page_to_pfn(node->hvm.pages[i]);
+		phys_addrs[i] = (unsigned long)pfn_to_kaddr(pfn);
+		addrs[i] = (unsigned long)pfn_to_kaddr(pfn);
+	}
+
+	err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
+				phys_addrs, GNTMAP_host_map, &leaked);
+	node->nr_handles = nr_grefs;
 
-	err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
 	if (err)
-		goto out_err_free_ballooned_pages;
+		goto out_free_ballooned_pages;
+
+	addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP,
+		    PAGE_KERNEL);
+	if (!addr) {
+		err = -ENOMEM;
+		goto out_xenbus_unmap_ring;
+	}
+
+	node->hvm.addr = addr;
 
 	spin_lock(&xenbus_valloc_lock);
 	list_add(&node->next, &xenbus_valloc_pages);
@@ -543,8 +677,16 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device
*dev,
 	*vaddr = addr;
 	return 0;
 
- out_err_free_ballooned_pages:
-	free_xenballooned_pages(1, &node->page);
+ out_xenbus_unmap_ring:
+	if (!leaked)
+		xenbus_unmap_ring(dev, node->handles, node->nr_handles,
+				  addrs);
+	else
+		pr_alert("leaking %p size %u page(s)",
+			 addr, nr_grefs);
+ out_free_ballooned_pages:
+	if (!leaked)
+		free_xenballooned_pages(nr_grefs, node->hvm.pages);
  out_err:
 	kfree(node);
 	return err;
@@ -554,35 +696,37 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device
*dev,
 /**
  * xenbus_map_ring
  * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
- * @vaddr: address to be mapped to
+ * @gnt_refs: grant reference array
+ * @nr_grefs: number of grant reference
+ * @handles: pointer to grant handle to be filled
+ * @vaddrs: addresses to be mapped to
+ * @leaked: fail to clean up a failed map, caller should not free vaddr
  *
- * Map a page of memory into this domain from another domain''s grant
table.
+ * Map pages of memory into this domain from another domain''s grant
table.
  * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
+ * this yourself!). It only maps in the pages to the specified address.
  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
- * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
+ * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch
to
+ * XenbusStateClosing and the first error message will be saved in XenStore.
+ * Further more if we fail to map the ring, caller should check @leaked.
+ * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller
+ * should not free the address space of @vaddr.
  */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-		    grant_handle_t *handle, void *vaddr)
+int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs,
+		    unsigned int nr_grefs, grant_handle_t *handles,
+		    unsigned long *vaddrs, bool *leaked)
 {
-	struct gnttab_map_grant_ref op;
+	phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
+	int i;
 
-	gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref,
-			  dev->otherend_id);
+	if (nr_grefs > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
 
-	gnttab_batch_map(&op, 1);
+	for (i = 0; i < nr_grefs; i++)
+		phys_addrs[i] = (unsigned long)vaddrs[i];
 
-	if (op.status != GNTST_okay) {
-		xenbus_dev_fatal(dev, op.status,
-				 "mapping in shared page %d from domain %d",
-				 gnt_ref, dev->otherend_id);
-	} else
-		*handle = op.handle;
-
-	return op.status;
+	return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles,
+				 phys_addrs, GNTMAP_host_map, leaked);
 }
 EXPORT_SYMBOL_GPL(xenbus_map_ring);
 
@@ -592,8 +736,7 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring);
  * @dev: xenbus device
  * @vaddr: addr to unmap
  *
- * Based on Rusty Russell''s skeleton driver''s unmap_page.
- * Unmap a page of memory in this domain that was imported from another domain.
+ * Unmap memory in this domain that was imported from another domain.
  * Use xenbus_unmap_ring_vfree if you mapped in your memory with
  * xenbus_map_ring_valloc (it will free the virtual address space).
  * Returns 0 on success and returns GNTST_* on error
@@ -608,14 +751,15 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
 	struct xenbus_map_node *node;
-	struct gnttab_unmap_grant_ref op = {
-		.host_addr = (unsigned long)vaddr,
-	};
+	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
 	unsigned int level;
+	int i;
+	bool leaked = false;
+	int err;
 
 	spin_lock(&xenbus_valloc_lock);
 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
-		if (node->area->addr == vaddr) {
+		if (node->pv.area->addr == vaddr) {
 			list_del(&node->next);
 			goto found;
 		}
@@ -630,22 +774,41 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device
*dev, void *vaddr)
 		return GNTST_bad_virt_addr;
 	}
 
-	op.handle = node->handle;
-	op.host_addr = arbitrary_virt_to_machine(
-		lookup_address((unsigned long)vaddr, &level)).maddr;
+	for (i = 0; i < node->nr_handles; i++) {
+		unsigned long addr;
 
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+		memset(&unmap[i], 0, sizeof(unmap[i]));
+		addr = (unsigned long)vaddr + (PAGE_SIZE * i);
+		unmap[i].host_addr = arbitrary_virt_to_machine(
+			lookup_address(addr, &level)).maddr;
+		unmap[i].dev_bus_addr = 0;
+		unmap[i].handle = node->handles[i];
+	}
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
 		BUG();
 
-	if (op.status == GNTST_okay)
-		free_vm_area(node->area);
+	err = GNTST_okay;
+	leaked = false;
+	for (i = 0; i < node->nr_handles; i++) {
+		if (unmap[i].status != GNTST_okay) {
+			leaked = true;
+			xenbus_dev_error(dev, unmap[i].status,
+					 "unmapping page at handle %d error %d",
+					 node->handles[i], unmap[i].status);
+			err = unmap[i].status;
+			break;
+		}
+	}
+
+	if (!leaked)
+		free_vm_area(node->pv.area);
 	else
-		xenbus_dev_error(dev, op.status,
-				 "unmapping page at handle %d error %d",
-				 node->handle, op.status);
+		pr_alert("leaking VM area %p size %u page(s)",
+			 node->pv.area, node->nr_handles);
 
 	kfree(node);
-	return op.status;
+	return err;
 }
 
 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
@@ -653,10 +816,12 @@ static int xenbus_unmap_ring_vfree_hvm(struct
xenbus_device *dev, void *vaddr)
 	int rv;
 	struct xenbus_map_node *node;
 	void *addr;
+	unsigned long addrs[XENBUS_MAX_RING_PAGES];
+	int i;
 
 	spin_lock(&xenbus_valloc_lock);
 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
-		addr = pfn_to_kaddr(page_to_pfn(node->page));
+		addr = node->hvm.addr;
 		if (addr == vaddr) {
 			list_del(&node->next);
 			goto found;
@@ -672,12 +837,16 @@ static int xenbus_unmap_ring_vfree_hvm(struct
xenbus_device *dev, void *vaddr)
 		return GNTST_bad_virt_addr;
 	}
 
-	rv = xenbus_unmap_ring(dev, node->handle, addr);
+	for (i = 0; i < node->nr_handles; i++)
+		addrs[i] = (unsigned long)pfn_to_kaddr(page_to_pfn(node->hvm.pages[i]));
 
+	rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
+			       addrs);
 	if (!rv)
-		free_xenballooned_pages(1, &node->page);
+		vunmap(vaddr);
 	else
-		WARN(1, "Leaking %p\n", vaddr);
+		WARN(1, "Leaking %p, size %u page(s)\n", vaddr,
+		     node->nr_handles);
 
 	kfree(node);
 	return rv;
@@ -686,29 +855,44 @@ static int xenbus_unmap_ring_vfree_hvm(struct
xenbus_device *dev, void *vaddr)
 /**
  * xenbus_unmap_ring
  * @dev: xenbus device
- * @handle: grant handle
- * @vaddr: addr to unmap
+ * @handles: grant handle array
+ * @nr_handles: number of handles in the array
+ * @vaddrs: addresses to unmap
  *
- * Unmap a page of memory in this domain that was imported from another domain.
+ * Unmap memory in this domain that was imported from another domain.
  * Returns 0 on success and returns GNTST_* on error
  * (see xen/include/interface/grant_table.h).
  */
 int xenbus_unmap_ring(struct xenbus_device *dev,
-		      grant_handle_t handle, void *vaddr)
+		      grant_handle_t *handles, unsigned int nr_handles,
+		      unsigned long *vaddrs)
 {
-	struct gnttab_unmap_grant_ref op;
+	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
+	int i;
+	int err;
 
-	gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, handle);
+	if (nr_handles > XENBUS_MAX_RING_PAGES)
+		return -EINVAL;
 
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+	for (i = 0; i < nr_handles; i++)
+		gnttab_set_unmap_op(&unmap[i], vaddrs[i],
+				    GNTMAP_host_map, handles[i]);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
 		BUG();
 
-	if (op.status != GNTST_okay)
-		xenbus_dev_error(dev, op.status,
-				 "unmapping page at handle %d error %d",
-				 handle, op.status);
+	err = GNTST_okay;
+	for (i = 0; i < nr_handles; i++) {
+		if (unmap[i].status != GNTST_okay) {
+			xenbus_dev_error(dev, unmap[i].status,
+					 "unmapping page at handle %d error %d",
+					 handles[i], unmap[i].status);
+			err = unmap[i].status;
+			break;
+		}
+	}
 
-	return op.status;
+	return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
 
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 569c07f..40abaf6 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -46,6 +46,10 @@
 #include <xen/interface/io/xenbus.h>
 #include <xen/interface/io/xs_wire.h>
 
+#define XENBUS_MAX_RING_PAGE_ORDER 4
+#define XENBUS_MAX_RING_PAGES      (1U << XENBUS_MAX_RING_PAGE_ORDER)
+#define INVALID_GRANT_HANDLE       (~0U)
+
 /* Register callback to watch this node. */
 struct xenbus_watch
 {
@@ -196,15 +200,19 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct
xenbus_watch *watch,
 			 const char *pathfmt, ...);
 
 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state
new_state);
-int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-			   int gnt_ref, void **vaddr);
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-			   grant_handle_t *handle, void *vaddr);
+int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+		      unsigned int nr_pages, grant_ref_t *grefs);
+int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
+			   unsigned int nr_grefs, void **vaddr);
+int xenbus_map_ring(struct xenbus_device *dev,
+		    grant_ref_t *gnt_refs, unsigned int nr_grefs,
+		    grant_handle_t *handles, unsigned long *vaddrs,
+		    bool *leaked);
 
 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
 int xenbus_unmap_ring(struct xenbus_device *dev,
-		      grant_handle_t handle, void *vaddr);
+		      grant_handle_t *handles, unsigned int nr_handles,
+		      unsigned long *vaddrs);
 
 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
 int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);
-- 
1.7.10.4
Wei Liu
2013-Nov-19  16:51 UTC
Re: [PATCH v5] xenbus_client: extend interface to suppurt multi-page ring
Ping? On Sun, Nov 10, 2013 at 05:21:29PM +0000, Wei Liu wrote:> Originally Xen PV drivers only use single-page ring to pass along > information. This might limit the throughput between frontend and > backend. > > The patch extends Xenbus driver to support multi-page ring, which in > general should improve throughput if ring is the bottleneck. Changes to > various frontend / backend to adapt to the new interface are also > included. > > Affected Xen drivers: > * blkfront/back > * netfront/back > * pcifront/back > * tpmfront > > The interface is documented, as before, in xenbus_client.c. > > Change in V2: > * allow ring has arbitrary number of pages <= XENBUS_MAX_RING_PAGES > > Change in V3: > * update function prototypes > * carefully deal with types of different sizes > > Change in V4: > * use PAGE_KERNEL instead of PAGE_KERNEL_IO to avoid breakage on Arm > > Change in V5: > * fix off-by-one error and other minor glitches spotted by Mathew Daley > > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > Cc: Konrad Wilk <konrad.wilk@oracle.com> > Cc: David Vrabel <david.vrabel@citrix.com> > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
David Vrabel
2013-Nov-19  17:09 UTC
Re: [PATCH v5] xenbus_client: extend interface to suppurt multi-page ring
On 10/11/13 17:21, Wei Liu wrote:> Originally Xen PV drivers only use single-page ring to pass along > information. This might limit the throughput between frontend and > backend. > > The patch extends Xenbus driver to support multi-page ring, which in > general should improve throughput if ring is the bottleneck. Changes to > various frontend / backend to adapt to the new interface are also > included. > > Affected Xen drivers: > * blkfront/back > * netfront/back > * pcifront/back > * tpmfront > > The interface is documented, as before, in xenbus_client.c.It looks ok to me so I guess: Reviewed-by: David Vrabel <david.vrabel@citrix.com> But, I think this should wait until either: a) There''s an in kernel user that uses multipage rings or b) You demonstrate multi page rings have been tested with some sort of unit test. David
Wei Liu
2013-Nov-19  17:14 UTC
Re: [PATCH v5] xenbus_client: extend interface to suppurt multi-page ring
On Tue, Nov 19, 2013 at 05:09:29PM +0000, David Vrabel wrote:> On 10/11/13 17:21, Wei Liu wrote: > > Originally Xen PV drivers only use single-page ring to pass along > > information. This might limit the throughput between frontend and > > backend. > > > > The patch extends Xenbus driver to support multi-page ring, which in > > general should improve throughput if ring is the bottleneck. Changes to > > various frontend / backend to adapt to the new interface are also > > included. > > > > Affected Xen drivers: > > * blkfront/back > > * netfront/back > > * pcifront/back > > * tpmfront > > > > The interface is documented, as before, in xenbus_client.c. > > It looks ok to me so I guess: > > Reviewed-by: David Vrabel <david.vrabel@citrix.com> > > But, I think this should wait until either: > > a) There''s an in kernel user that uses multipage rings >The main point I picked this up is there seems to be people interested in this during Xen developer summit. None of the users is in tree though.> or > > b) You demonstrate multi page rings have been tested with some sort of > unit test. >Wait until I have time to develop multi page ring for network driver? But I''m not very keen on doing that as the improvement doesn''t look very impressive (if there''s any). Wei.> David
David Vrabel
2013-Nov-19  17:23 UTC
Re: [PATCH v5] xenbus_client: extend interface to suppurt multi-page ring
On 19/11/13 17:14, Wei Liu wrote:> On Tue, Nov 19, 2013 at 05:09:29PM +0000, David Vrabel wrote: >> On 10/11/13 17:21, Wei Liu wrote: >>> Originally Xen PV drivers only use single-page ring to pass along >>> information. This might limit the throughput between frontend and >>> backend. >>> >>> The patch extends Xenbus driver to support multi-page ring, which in >>> general should improve throughput if ring is the bottleneck. Changes to >>> various frontend / backend to adapt to the new interface are also >>> included. >>> >>> Affected Xen drivers: >>> * blkfront/back >>> * netfront/back >>> * pcifront/back >>> * tpmfront >>> >>> The interface is documented, as before, in xenbus_client.c. >> >> It looks ok to me so I guess: >> >> Reviewed-by: David Vrabel <david.vrabel@citrix.com> >> >> But, I think this should wait until either: >> >> a) There''s an in kernel user that uses multipage rings >> > > The main point I picked this up is there seems to be people interested > in this during Xen developer summit. None of the users is in tree > though. > >> or >> >> b) You demonstrate multi page rings have been tested with some sort of >> unit test. >> > > Wait until I have time to develop multi page ring for network driver?I said unit test. A test driver that creates a multipage ring, maps it and verifies that reads/writes to the two different mappings (i.e, ''front'' and ''back'') work as expected. This shouldn''t be a lot of work. We can''t take a new feature that hasn''t been tested at all. David