Santosh Jodh
2012-Mar-05 21:49 UTC
[PATCH 0001/001] xen: multi page ring support for block devices
From: Santosh Jodh <santosh.jodh at citrix.com> Add support for multi page ring for block devices. The number of pages is configurable for blkback via module parameter. blkback reports max-ring-page-order to blkfront via xenstore. blkfront reports its supported ring-page-order to blkback via xenstore. blkfront reports multi page ring references via ring-refNN in xenstore. The change allows newer blkfront to work with older blkback and vice-versa. Based on original patch by Paul Durrant. Signed-off-by: Santosh Jodh <santosh.jodh at citrix.com> --- diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 0088bf6..72f2e18 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64; module_param_named(reqs, xen_blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); +/* Order of maximum shared ring size advertised to the front end. */ +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER; + +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) + +static int set_max_ring_order(const char *buf, struct kernel_param *kp) +{ + int err; + unsigned long order; + + err = kstrtol(buf, 0, &order); + if (err || + order < 0 || + order > XENBUS_MAX_RING_ORDER) + return -EINVAL; + + if (xen_blkif_reqs < BLK_RING_SIZE(order)) + printk(KERN_WARNING "WARNING: " + "I/O request space (%d reqs) < ring order %ld, " + "consider increasing %s.reqs to >= %ld.", + xen_blkif_reqs, order, KBUILD_MODNAME, + roundup_pow_of_two(BLK_RING_SIZE(order))); + + xen_blkif_max_ring_order = order; + + return 0; +} + +module_param_call(max_ring_order, + set_max_ring_order, param_get_int, + &xen_blkif_max_ring_order, 0644); +MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages."); + /* Run-time switchable: /sys/module/blkback/parameters/ */ static unsigned int log_stats; module_param(log_stats, int, 0644); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index d0ee7ed..5f33a1a 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -126,6 +126,8 @@ struct blkif_x86_64_response { int16_t status; /* BLKIF_RSP_??? */ }; +extern int xen_blkif_max_ring_order; + DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 24a2fb5..7a9d71d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) return blkif; } -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, - unsigned int evtchn) +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[], + unsigned int ring_order, unsigned int evtchn) { int err; @@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, if (blkif->irq) return 0; - err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); + err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order, + &blkif->blk_ring); if (err < 0) return err; @@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, { struct blkif_sring *sring; sring = (struct blkif_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.native, sring, + PAGE_SIZE << ring_order); break; } case BLKIF_PROTOCOL_X86_32: { struct blkif_x86_32_sring *sring_x86_32; sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, + PAGE_SIZE << ring_order); break; } case BLKIF_PROTOCOL_X86_64: { struct blkif_x86_64_sring *sring_x86_64; sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, + PAGE_SIZE << ring_order); break; } default: @@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev, if (err) goto fail; + err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", + "%u", xen_blkif_max_ring_order); + if (err) + goto fail; + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -744,22 +753,80 @@ again: static int connect_ring(struct backend_info *be) { struct xenbus_device *dev = be->dev; - unsigned long ring_ref; + int ring_ref[XENBUS_MAX_RING_PAGES]; + unsigned int ring_order; unsigned int evtchn; char protocol[64] = ""; int err; DPRINTK("%s", dev->otherend); - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", - &ring_ref, "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_fatal(dev, err, - "reading %s/ring-ref and event-channel", + err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", + &evtchn); + if (err != 1) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, "reading %s/event-channel", dev->otherend); return err; } + printk(KERN_INFO "blkback: event-channel %u\n", evtchn); + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", + &ring_order); + if (err != 1) { + DPRINTK("%s: using single page handshake", dev->otherend); + + ring_order = 0; + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", + "%d", &ring_ref[0]); + if (err != 1) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", + dev->otherend); + return err; + } + + printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]); + } else { + unsigned int i; + + if (ring_order > xen_blkif_max_ring_order) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, + "%s/ring-page-order too big", + dev->otherend); + return err; + } + + for (i = 0; i < (1u << ring_order); i++) { + char ring_ref_name[10]; + + snprintf(ring_ref_name, sizeof(ring_ref_name), + "ring-ref%u", i); + + err = xenbus_scanf(XBT_NIL, dev->otherend, + ring_ref_name, "%d", + &ring_ref[i]); + if (err != 1) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, + "reading %s/%s", + dev->otherend, + ring_ref_name); + return err; + } + + printk(KERN_INFO "blkback: ring-ref%u %d\n", i, + ring_ref[i]); + } + } + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", "%63s", protocol, NULL); @@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ - err = xen_blkif_map(be->blkif, ring_ref, evtchn); + err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn); if (err) { - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", - ring_ref, evtchn); + xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn"); return err; } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2f22874..485813a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -57,6 +57,10 @@ #include <asm/xen/hypervisor.h> +static int xen_blkif_ring_order; +module_param_named(reqs, xen_blkif_ring_order, int, 0); +MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages."); + enum blkif_state { BLKIF_STATE_DISCONNECTED, BLKIF_STATE_CONNECTED, @@ -72,7 +76,8 @@ struct blk_shadow { static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) +#define BLK_MAX_RING_SIZE BLK_RING_SIZE(XENBUS_MAX_RING_ORDER) /* * We have one of these per vbd, whether ide, scsi or 'other'. They @@ -87,14 +92,15 @@ struct blkfront_info int vdevice; blkif_vdev_t handle; enum blkif_state connected; - int ring_ref; + int ring_ref[XENBUS_MAX_RING_PAGES]; + int ring_order; struct blkif_front_ring ring; struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; - struct blk_shadow shadow[BLK_RING_SIZE]; + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; @@ -111,9 +117,7 @@ static unsigned int nr_minors; static unsigned long *minors; static DEFINE_SPINLOCK(minor_lock); -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) -#define GRANT_INVALID_REF 0 +#define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 #define PARTS_PER_EXT_DISK 256 @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock); static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; - BUG_ON(free >= BLK_RING_SIZE); + BUG_ON(free >= BLK_MAX_RING_SIZE); info->shadow_free = info->shadow[free].req.u.rw.id; info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ return free; @@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { + int i; + /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&blkif_io_lock); info->connected = suspend ? @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend) flush_work_sync(&info->work); /* Free resources associated with old device channel. */ - if (info->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - info->ring_ref = GRANT_INVALID_REF; - info->ring.sring = NULL; + for (i = 0; i < (1 << info->ring_order); i++) { + if (info->ring_ref[i] != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); + info->ring_ref[i] = GRANT_INVALID_REF; + } } + + free_pages((unsigned long)info->ring.sring, info->ring_order); + info->ring.sring = NULL; + if (info->irq) unbind_from_irqhandler(info->irq, info); info->evtchn = info->irq = 0; - } static void blkif_completion(struct blk_shadow *s) @@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev, struct blkif_sring *sring; int err; - info->ring_ref = GRANT_INVALID_REF; - - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); + sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, + info->ring_order); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order); sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); + err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order, + info->ring_ref); if (err < 0) { - free_page((unsigned long)sring); + free_pages((unsigned long)sring, info->ring_order); info->ring.sring = NULL; goto fail; } - info->ring_ref = err; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev, { const char *message = NULL; struct xenbus_transaction xbt; + unsigned int ring_order; + int legacy_backend; + int i; int err; + for (i = 0; i < (1 << info->ring_order); i++) + info->ring_ref[i] = GRANT_INVALID_REF; + + err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u", + &ring_order); + + legacy_backend = !(err == 1); + + if (legacy_backend) { + info->ring_order = 0; + } else { + info->ring_order = (ring_order <= xen_blkif_ring_order) ? + ring_order : + xen_blkif_ring_order; + } + /* Create shared ring, alloc event channel. */ err = setup_blkring(dev, info); if (err) @@ -889,12 +916,35 @@ again: goto destroy_blkring; } - err = xenbus_printf(xbt, dev->nodename, - "ring-ref", "%u", info->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; + if (legacy_backend) { + err = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%d", info->ring_ref[0]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } else { + for (i = 0; i < (1 << info->ring_order); i++) { + char key[sizeof("ring-ref") + 2]; + + sprintf(key, "ring-ref%d", i); + + err = xenbus_printf(xbt, dev->nodename, + key, "%d", info->ring_ref[i]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } + + err = xenbus_printf(xbt, dev->nodename, + "ring-page-order", "%u", info->ring_order); + if (err) { + message = "writing ring-order"; + goto abort_transaction; + } } + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", info->evtchn); if (err) { @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev, info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < BLK_MAX_RING_SIZE; i++) info->shadow[i].req.u.rw.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); - err = talk_to_blkback(dev, info); - if (err) { - kfree(info); - dev_set_drvdata(&dev->dev, NULL); - return err; - } - return 0; } @@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < BLK_MAX_RING_SIZE; i++) info->shadow[i].req.u.rw.id = i+1; info->shadow_free = info->ring.req_prod_pvt; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Stage 3: Find pending requests and requeue them. */ - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) { /* Not in use? */ if (!copy[i].request) continue; @@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev, switch (backend_state) { case XenbusStateInitialising: - case XenbusStateInitWait: case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev, case XenbusStateClosed: break; + case XenbusStateInitWait: + talk_to_blkback(dev, info); + break; + case XenbusStateConnected: blkfront_connect(info); break; diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 94b79c3..f93b59a 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif); /* (Un)Map communication rings. */ void xen_netbk_unmap_frontend_rings(struct xenvif *vif); int xen_netbk_map_frontend_rings(struct xenvif *vif, - grant_ref_t tx_ring_ref, - grant_ref_t rx_ring_ref); + int tx_ring_ref, + int rx_ring_ref); /* (De)Register a xenvif with the netback backend. */ void xen_netbk_add_xenvif(struct xenvif *vif); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 59effac..0b014cf 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif) } int xen_netbk_map_frontend_rings(struct xenvif *vif, - grant_ref_t tx_ring_ref, - grant_ref_t rx_ring_ref) + int tx_ring_ref, + int rx_ring_ref) { void *addr; struct xen_netif_tx_sring *txs; @@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, int err = -ENOMEM; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - tx_ring_ref, &addr); + &tx_ring_ref, 1, &addr); if (err) goto err; @@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - rx_ring_ref, &addr); + &rx_ring_ref, 1, &addr); if (err) goto err; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 698b905..521a595 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) SHARED_RING_INIT(txs); FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(txs)); + err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref); if (err < 0) { free_page((unsigned long)txs); goto fail; } - info->tx_ring_ref = err; rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); if (!rxs) { err = -ENOMEM; @@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) SHARED_RING_INIT(rxs); FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); + err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref); if (err < 0) { free_page((unsigned long)rxs); goto fail; } - info->rx_ring_ref = err; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 1620088..95109d8 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev) int err = 0; struct xenbus_transaction trans; - err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref); if (err < 0) goto out; - pdev->gnt_ref = err; - err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); if (err) goto out; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 64b11f9..e0834cd 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", gnt_ref, remote_evtchn); - err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); + err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr); if (err < 0) { xenbus_dev_fatal(pdev->xdev, err, "Error mapping other domain page in ours."); diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 566d2ad..3a14524 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -53,14 +53,16 @@ struct xenbus_map_node { struct vm_struct *area; /* PV */ struct page *page; /* HVM */ }; - grant_handle_t handle; + grant_handle_t handle[XENBUS_MAX_RING_PAGES]; + unsigned int nr_handles; }; static DEFINE_SPINLOCK(xenbus_valloc_lock); static LIST_HEAD(xenbus_valloc_pages); struct xenbus_ring_ops { - int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); + int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts, + void **vaddr); int (*unmap)(struct xenbus_device *dev, void *vaddr); }; @@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, /** * xenbus_grant_ring * @dev: xenbus device - * @ring_mfn: mfn of ring to grant - - * Grant access to the given @ring_mfn to the peer of the given device. Return - * 0 on success, or -errno on error. On error, the device will switch to - * XenbusStateClosing, and the error will be saved in the store. + * @vaddr: starting virtual address of the ring + * @nr_pages: number of page to be granted + * @grefs: grant reference array to be filled in + * Grant access to the given @vaddr to the peer of the given device. + * Then fill in @grefs with grant references. Return 0 on success, or + * -errno on error. On error, the device will switch to + * XenbusStateClosing, and the first error will be saved in the store. */ -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + int nr_pages, int grefs[]) { - int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); - if (err < 0) - xenbus_dev_fatal(dev, err, "granting access to ring page"); + int i; + int err; + + for (i = 0; i < nr_pages; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + err = gnttab_grant_foreign_access(dev->otherend_id, + virt_to_mfn(addr), 0); + if (err < 0) { + xenbus_dev_fatal(dev, err, + "granting access to ring page"); + goto fail; + } + grefs[i] = err; + } + + return 0; + +fail: + for ( ; i >= 0; i--) + gnttab_end_foreign_access_ref(grefs[i], 0); return err; } EXPORT_SYMBOL_GPL(xenbus_grant_ring); @@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); /** * xenbus_map_ring_valloc * @dev: xenbus device - * @gnt_ref: grant reference + * @gnt_ref: grant reference array + * @nr_grefs: number of grant reference * @vaddr: pointer to address to be filled out by mapping * * Based on Rusty Russell's skeleton driver's map_page. @@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); * or -ENOMEM on error. If an error is returned, device will switch to * XenbusStateClosing and the error message will be saved in XenStore. */ -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], + int nr_grefs, void **vaddr) { - return ring_ops->map(dev, gnt_ref, vaddr); + return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr); } EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, + struct xenbus_map_node *node); + static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + int gnt_ref[], int nr_grefs, void **vaddr) { - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map | GNTMAP_contains_pte, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; struct xenbus_map_node *node; struct vm_struct *area; - pte_t *pte; + pte_t *pte[XENBUS_MAX_RING_PAGES]; + int i; + int err = 0; + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; *vaddr = NULL; @@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, if (!node) return -ENOMEM; - area = alloc_vm_area(PAGE_SIZE, &pte); + area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte); if (!area) { kfree(node); return -ENOMEM; } - op.host_addr = arbitrary_virt_to_machine(pte).maddr; + for (i = 0; i < nr_grefs; i++) { + op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte, + op[i].ref = gnt_ref[i], + op[i].dom = dev->otherend_id, + op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr; + }; if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) BUG(); - if (op.status != GNTST_okay) { - free_vm_area(area); - kfree(node); - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - return op.status; + node->nr_handles = nr_grefs; + node->area = area; + + for (i = 0; i < nr_grefs; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + node->handle[i] = INVALID_GRANT_HANDLE; + continue; + } + node->handle[i] = op[i].handle; } - node->handle = op.handle; - node->area = area; + if (err != 0) { + for (i = 0; i < nr_grefs; i++) + xenbus_dev_fatal(dev, op[i].status, + "mapping in shared page %d from domain %d", + gnt_ref[i], dev->otherend_id); + + __xenbus_unmap_ring_vfree_pv(dev, node); + + return err; + } spin_lock(&xenbus_valloc_lock); list_add(&node->next, &xenbus_valloc_pages); @@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, } static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + int gnt_ref[], int nr_grefs, void **vaddr) { struct xenbus_map_node *node; int err; void *addr; + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + *vaddr = NULL; node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; - err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); + err = alloc_xenballooned_pages(nr_grefs, &node->page, + false /* lowmem */); if (err) goto out_err; addr = pfn_to_kaddr(page_to_pfn(node->page)); - err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); + err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr); if (err) goto out_err; @@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, return 0; out_err: - free_xenballooned_pages(1, &node->page); + free_xenballooned_pages(nr_grefs, &node->page); kfree(node); return err; } @@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, /** * xenbus_map_ring * @dev: xenbus device - * @gnt_ref: grant reference - * @handle: pointer to grant handle to be filled + * @gnt_ref: grant reference array + * @nr_grefs: number of grant references + * @handle: pointer to grant handle array to be filled, mind the size * @vaddr: address to be mapped to * - * Map a page of memory into this domain from another domain's grant table. + * Map pages of memory into this domain from another domain's grant table. * xenbus_map_ring does not allocate the virtual address space (you must do - * this yourself!). It only maps in the page to the specified address. + * this yourself!). It only maps in the pages to the specified address. * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) * or -ENOMEM on error. If an error is returned, device will switch to - * XenbusStateClosing and the error message will be saved in XenStore. + * XenbusStateClosing and the last error message will be saved in XenStore. */ -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr) +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, + grant_handle_t handle[], void *vaddr) { - struct gnttab_map_grant_ref op; - - gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, - dev->otherend_id); + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; + int i; + int err = GNTST_okay; /* 0 */ + + for (i = 0; i < nr_grefs; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + gnttab_set_map_op(&op[i], (phys_addr_t)addr, + GNTMAP_host_map, gnt_ref[i], + dev->otherend_id); + } - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs)) BUG(); - if (op.status != GNTST_okay) { - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - } else - *handle = op.handle; + for (i = 0; i < nr_grefs; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + xenbus_dev_fatal(dev, err, + "mapping in shared page %d from domain %d", + gnt_ref[i], dev->otherend_id); + handle[i] = INVALID_GRANT_HANDLE; + } else + handle[i] = op[i].handle; + } - return op.status; + if (err != GNTST_okay) + xenbus_unmap_ring(dev, handle, nr_grefs, vaddr); + + return err; } EXPORT_SYMBOL_GPL(xenbus_map_ring); @@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) } EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, + struct xenbus_map_node *node) +{ + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; + unsigned int level; + int i, j; + int err = GNTST_okay; + + j = 0; + for (i = 0; i < node->nr_handles; i++) { + unsigned long vaddr = (unsigned long)node->area->addr + + (PAGE_SIZE * i); + if (node->handle[i] != INVALID_GRANT_HANDLE) { + memset(&op[j], 0, sizeof(op[0])); + op[j].host_addr = arbitrary_virt_to_machine( + lookup_address(vaddr, &level)).maddr; + op[j].handle = node->handle[i]; + j++; + node->handle[i] = INVALID_GRANT_HANDLE; + } + } + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) + BUG(); + + node->nr_handles = 0; + + for (i = 0; i < j; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + xenbus_dev_error(dev, err, + "unmapping page %d at handle %d error %d", + i, op[i].handle, err); + } + } + + if (err == GNTST_okay) + free_vm_area(node->area); + + kfree(node); + + return err; +} + static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) { struct xenbus_map_node *node; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; - unsigned int level; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { @@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) if (!node) { xenbus_dev_error(dev, -ENOENT, - "can't find mapped virtual address %p", vaddr); + "can't find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - op.handle = node->handle; - op.host_addr = arbitrary_virt_to_machine( - lookup_address((unsigned long)vaddr, &level)).maddr; - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); - - if (op.status == GNTST_okay) - free_vm_area(node->area); - else - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - node->handle, op.status); - - kfree(node); - return op.status; + return __xenbus_unmap_ring_vfree_pv(dev, node); } static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) { int rv; struct xenbus_map_node *node; - void *addr; + void *addr = NULL; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { @@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) if (!node) { xenbus_dev_error(dev, -ENOENT, - "can't find mapped virtual address %p", vaddr); + "can't find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - rv = xenbus_unmap_ring(dev, node->handle, addr); + rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr); if (!rv) - free_xenballooned_pages(1, &node->page); + free_xenballooned_pages(node->nr_handles, &node->page); else WARN(1, "Leaking %p\n", vaddr); @@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) * xenbus_unmap_ring * @dev: xenbus device * @handle: grant handle + * @nr_handles: number of grant handle * @vaddr: addr to unmap * * Unmap a page of memory in this domain that was imported from another domain. @@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) * (see xen/include/interface/grant_table.h). */ int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr) + grant_handle_t handle[], int nr_handles, + void *vaddr) { - struct gnttab_unmap_grant_ref op; - - gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; + int i, j; + int err = GNTST_okay; + + j = 0; + for (i = 0; i < nr_handles; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + if (handle[i] != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr, + GNTMAP_host_map, handle[i]); + handle[i] = INVALID_GRANT_HANDLE; + } + } - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) BUG(); - if (op.status != GNTST_okay) - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - handle, op.status); + for (i = 0; i < j; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + xenbus_dev_error(dev, err, + "unmapping page at handle %d error %d", + handle[i], err); + } + } - return op.status; + return err; } EXPORT_SYMBOL_GPL(xenbus_unmap_ring); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 3864967..62b92d2 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -718,6 +718,7 @@ static int __init xenstored_local_init(void) return err; } +extern void xenbus_ring_ops_init(void); static int __init xenbus_init(void) { int err = 0; @@ -767,6 +768,8 @@ static int __init xenbus_init(void) proc_mkdir("xen", NULL); #endif + xenbus_ring_ops_init(); + out_error: return err; } diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index e8c599b..cdbd948 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, const char *pathfmt, ...); int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); -int xenbus_map_ring_valloc(struct xenbus_device *dev, - int gnt_ref, void **vaddr); -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr); + +#define XENBUS_MAX_RING_ORDER 2 +#define XENBUS_MAX_RING_PAGES (1 << XENBUS_MAX_RING_ORDER) + +#define INVALID_GRANT_HANDLE (~0U) + +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + int nr_pages, int grefs[]); +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], + int nr_grefs, void **vaddr); +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, + grant_handle_t handle[], void *vaddr); int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr); + grant_handle_t handle[], int nr_handles, + void *vaddr); int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);
Santosh Jodh
2012-Mar-05 21:49 UTC
[PATCH 0001/001] xen: multi page ring support for block devices
From: Santosh Jodh <santosh.jodh@citrix.com> Add support for multi page ring for block devices. The number of pages is configurable for blkback via module parameter. blkback reports max-ring-page-order to blkfront via xenstore. blkfront reports its supported ring-page-order to blkback via xenstore. blkfront reports multi page ring references via ring-refNN in xenstore. The change allows newer blkfront to work with older blkback and vice-versa. Based on original patch by Paul Durrant. Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com> --- diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 0088bf6..72f2e18 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64; module_param_named(reqs, xen_blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); +/* Order of maximum shared ring size advertised to the front end. */ +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER; + +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) + +static int set_max_ring_order(const char *buf, struct kernel_param *kp) +{ + int err; + unsigned long order; + + err = kstrtol(buf, 0, &order); + if (err || + order < 0 || + order > XENBUS_MAX_RING_ORDER) + return -EINVAL; + + if (xen_blkif_reqs < BLK_RING_SIZE(order)) + printk(KERN_WARNING "WARNING: " + "I/O request space (%d reqs) < ring order %ld, " + "consider increasing %s.reqs to >= %ld.", + xen_blkif_reqs, order, KBUILD_MODNAME, + roundup_pow_of_two(BLK_RING_SIZE(order))); + + xen_blkif_max_ring_order = order; + + return 0; +} + +module_param_call(max_ring_order, + set_max_ring_order, param_get_int, + &xen_blkif_max_ring_order, 0644); +MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages."); + /* Run-time switchable: /sys/module/blkback/parameters/ */ static unsigned int log_stats; module_param(log_stats, int, 0644); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index d0ee7ed..5f33a1a 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -126,6 +126,8 @@ struct blkif_x86_64_response { int16_t status; /* BLKIF_RSP_??? */ }; +extern int xen_blkif_max_ring_order; + DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 24a2fb5..7a9d71d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) return blkif; } -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, - unsigned int evtchn) +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[], + unsigned int ring_order, unsigned int evtchn) { int err; @@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, if (blkif->irq) return 0; - err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); + err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order, + &blkif->blk_ring); if (err < 0) return err; @@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, { struct blkif_sring *sring; sring = (struct blkif_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.native, sring, + PAGE_SIZE << ring_order); break; } case BLKIF_PROTOCOL_X86_32: { struct blkif_x86_32_sring *sring_x86_32; sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, + PAGE_SIZE << ring_order); break; } case BLKIF_PROTOCOL_X86_64: { struct blkif_x86_64_sring *sring_x86_64; sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, + PAGE_SIZE << ring_order); break; } default: @@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev, if (err) goto fail; + err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", + "%u", xen_blkif_max_ring_order); + if (err) + goto fail; + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -744,22 +753,80 @@ again: static int connect_ring(struct backend_info *be) { struct xenbus_device *dev = be->dev; - unsigned long ring_ref; + int ring_ref[XENBUS_MAX_RING_PAGES]; + unsigned int ring_order; unsigned int evtchn; char protocol[64] = ""; int err; DPRINTK("%s", dev->otherend); - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", - &ring_ref, "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_fatal(dev, err, - "reading %s/ring-ref and event-channel", + err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", + &evtchn); + if (err != 1) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, "reading %s/event-channel", dev->otherend); return err; } + printk(KERN_INFO "blkback: event-channel %u\n", evtchn); + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", + &ring_order); + if (err != 1) { + DPRINTK("%s: using single page handshake", dev->otherend); + + ring_order = 0; + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", + "%d", &ring_ref[0]); + if (err != 1) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", + dev->otherend); + return err; + } + + printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]); + } else { + unsigned int i; + + if (ring_order > xen_blkif_max_ring_order) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, + "%s/ring-page-order too big", + dev->otherend); + return err; + } + + for (i = 0; i < (1u << ring_order); i++) { + char ring_ref_name[10]; + + snprintf(ring_ref_name, sizeof(ring_ref_name), + "ring-ref%u", i); + + err = xenbus_scanf(XBT_NIL, dev->otherend, + ring_ref_name, "%d", + &ring_ref[i]); + if (err != 1) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, + "reading %s/%s", + dev->otherend, + ring_ref_name); + return err; + } + + printk(KERN_INFO "blkback: ring-ref%u %d\n", i, + ring_ref[i]); + } + } + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", "%63s", protocol, NULL); @@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ - err = xen_blkif_map(be->blkif, ring_ref, evtchn); + err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn); if (err) { - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", - ring_ref, evtchn); + xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn"); return err; } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2f22874..485813a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -57,6 +57,10 @@ #include <asm/xen/hypervisor.h> +static int xen_blkif_ring_order; +module_param_named(reqs, xen_blkif_ring_order, int, 0); +MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages."); + enum blkif_state { BLKIF_STATE_DISCONNECTED, BLKIF_STATE_CONNECTED, @@ -72,7 +76,8 @@ struct blk_shadow { static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) +#define BLK_MAX_RING_SIZE BLK_RING_SIZE(XENBUS_MAX_RING_ORDER) /* * We have one of these per vbd, whether ide, scsi or ''other''. They @@ -87,14 +92,15 @@ struct blkfront_info int vdevice; blkif_vdev_t handle; enum blkif_state connected; - int ring_ref; + int ring_ref[XENBUS_MAX_RING_PAGES]; + int ring_order; struct blkif_front_ring ring; struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; - struct blk_shadow shadow[BLK_RING_SIZE]; + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; @@ -111,9 +117,7 @@ static unsigned int nr_minors; static unsigned long *minors; static DEFINE_SPINLOCK(minor_lock); -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) -#define GRANT_INVALID_REF 0 +#define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 #define PARTS_PER_EXT_DISK 256 @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock); static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; - BUG_ON(free >= BLK_RING_SIZE); + BUG_ON(free >= BLK_MAX_RING_SIZE); info->shadow_free = info->shadow[free].req.u.rw.id; info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ return free; @@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { + int i; + /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&blkif_io_lock); info->connected = suspend ? @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend) flush_work_sync(&info->work); /* Free resources associated with old device channel. */ - if (info->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - info->ring_ref = GRANT_INVALID_REF; - info->ring.sring = NULL; + for (i = 0; i < (1 << info->ring_order); i++) { + if (info->ring_ref[i] != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); + info->ring_ref[i] = GRANT_INVALID_REF; + } } + + free_pages((unsigned long)info->ring.sring, info->ring_order); + info->ring.sring = NULL; + if (info->irq) unbind_from_irqhandler(info->irq, info); info->evtchn = info->irq = 0; - } static void blkif_completion(struct blk_shadow *s) @@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev, struct blkif_sring *sring; int err; - info->ring_ref = GRANT_INVALID_REF; - - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); + sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, + info->ring_order); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order); sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); + err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order, + info->ring_ref); if (err < 0) { - free_page((unsigned long)sring); + free_pages((unsigned long)sring, info->ring_order); info->ring.sring = NULL; goto fail; } - info->ring_ref = err; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev, { const char *message = NULL; struct xenbus_transaction xbt; + unsigned int ring_order; + int legacy_backend; + int i; int err; + for (i = 0; i < (1 << info->ring_order); i++) + info->ring_ref[i] = GRANT_INVALID_REF; + + err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u", + &ring_order); + + legacy_backend = !(err == 1); + + if (legacy_backend) { + info->ring_order = 0; + } else { + info->ring_order = (ring_order <= xen_blkif_ring_order) ? + ring_order : + xen_blkif_ring_order; + } + /* Create shared ring, alloc event channel. */ err = setup_blkring(dev, info); if (err) @@ -889,12 +916,35 @@ again: goto destroy_blkring; } - err = xenbus_printf(xbt, dev->nodename, - "ring-ref", "%u", info->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; + if (legacy_backend) { + err = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%d", info->ring_ref[0]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } else { + for (i = 0; i < (1 << info->ring_order); i++) { + char key[sizeof("ring-ref") + 2]; + + sprintf(key, "ring-ref%d", i); + + err = xenbus_printf(xbt, dev->nodename, + key, "%d", info->ring_ref[i]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } + + err = xenbus_printf(xbt, dev->nodename, + "ring-page-order", "%u", info->ring_order); + if (err) { + message = "writing ring-order"; + goto abort_transaction; + } } + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", info->evtchn); if (err) { @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev, info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < BLK_MAX_RING_SIZE; i++) info->shadow[i].req.u.rw.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, ''/'')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); - err = talk_to_blkback(dev, info); - if (err) { - kfree(info); - dev_set_drvdata(&dev->dev, NULL); - return err; - } - return 0; } @@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < BLK_MAX_RING_SIZE; i++) info->shadow[i].req.u.rw.id = i+1; info->shadow_free = info->ring.req_prod_pvt; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Stage 3: Find pending requests and requeue them. */ - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) { /* Not in use? */ if (!copy[i].request) continue; @@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev, switch (backend_state) { case XenbusStateInitialising: - case XenbusStateInitWait: case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev, case XenbusStateClosed: break; + case XenbusStateInitWait: + talk_to_blkback(dev, info); + break; + case XenbusStateConnected: blkfront_connect(info); break; diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 94b79c3..f93b59a 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif); /* (Un)Map communication rings. */ void xen_netbk_unmap_frontend_rings(struct xenvif *vif); int xen_netbk_map_frontend_rings(struct xenvif *vif, - grant_ref_t tx_ring_ref, - grant_ref_t rx_ring_ref); + int tx_ring_ref, + int rx_ring_ref); /* (De)Register a xenvif with the netback backend. */ void xen_netbk_add_xenvif(struct xenvif *vif); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 59effac..0b014cf 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif) } int xen_netbk_map_frontend_rings(struct xenvif *vif, - grant_ref_t tx_ring_ref, - grant_ref_t rx_ring_ref) + int tx_ring_ref, + int rx_ring_ref) { void *addr; struct xen_netif_tx_sring *txs; @@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, int err = -ENOMEM; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - tx_ring_ref, &addr); + &tx_ring_ref, 1, &addr); if (err) goto err; @@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - rx_ring_ref, &addr); + &rx_ring_ref, 1, &addr); if (err) goto err; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 698b905..521a595 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) SHARED_RING_INIT(txs); FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(txs)); + err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref); if (err < 0) { free_page((unsigned long)txs); goto fail; } - info->tx_ring_ref = err; rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); if (!rxs) { err = -ENOMEM; @@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) SHARED_RING_INIT(rxs); FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); + err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref); if (err < 0) { free_page((unsigned long)rxs); goto fail; } - info->rx_ring_ref = err; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 1620088..95109d8 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev) int err = 0; struct xenbus_transaction trans; - err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref); if (err < 0) goto out; - pdev->gnt_ref = err; - err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); if (err) goto out; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 64b11f9..e0834cd 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", gnt_ref, remote_evtchn); - err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); + err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr); if (err < 0) { xenbus_dev_fatal(pdev->xdev, err, "Error mapping other domain page in ours."); diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 566d2ad..3a14524 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -53,14 +53,16 @@ struct xenbus_map_node { struct vm_struct *area; /* PV */ struct page *page; /* HVM */ }; - grant_handle_t handle; + grant_handle_t handle[XENBUS_MAX_RING_PAGES]; + unsigned int nr_handles; }; static DEFINE_SPINLOCK(xenbus_valloc_lock); static LIST_HEAD(xenbus_valloc_pages); struct xenbus_ring_ops { - int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); + int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts, + void **vaddr); int (*unmap)(struct xenbus_device *dev, void *vaddr); }; @@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, /** * xenbus_grant_ring * @dev: xenbus device - * @ring_mfn: mfn of ring to grant - - * Grant access to the given @ring_mfn to the peer of the given device. Return - * 0 on success, or -errno on error. On error, the device will switch to - * XenbusStateClosing, and the error will be saved in the store. + * @vaddr: starting virtual address of the ring + * @nr_pages: number of page to be granted + * @grefs: grant reference array to be filled in + * Grant access to the given @vaddr to the peer of the given device. + * Then fill in @grefs with grant references. Return 0 on success, or + * -errno on error. On error, the device will switch to + * XenbusStateClosing, and the first error will be saved in the store. */ -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + int nr_pages, int grefs[]) { - int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); - if (err < 0) - xenbus_dev_fatal(dev, err, "granting access to ring page"); + int i; + int err; + + for (i = 0; i < nr_pages; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + err = gnttab_grant_foreign_access(dev->otherend_id, + virt_to_mfn(addr), 0); + if (err < 0) { + xenbus_dev_fatal(dev, err, + "granting access to ring page"); + goto fail; + } + grefs[i] = err; + } + + return 0; + +fail: + for ( ; i >= 0; i--) + gnttab_end_foreign_access_ref(grefs[i], 0); return err; } EXPORT_SYMBOL_GPL(xenbus_grant_ring); @@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); /** * xenbus_map_ring_valloc * @dev: xenbus device - * @gnt_ref: grant reference + * @gnt_ref: grant reference array + * @nr_grefs: number of grant reference * @vaddr: pointer to address to be filled out by mapping * * Based on Rusty Russell''s skeleton driver''s map_page. @@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); * or -ENOMEM on error. If an error is returned, device will switch to * XenbusStateClosing and the error message will be saved in XenStore. */ -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], + int nr_grefs, void **vaddr) { - return ring_ops->map(dev, gnt_ref, vaddr); + return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr); } EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, + struct xenbus_map_node *node); + static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + int gnt_ref[], int nr_grefs, void **vaddr) { - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map | GNTMAP_contains_pte, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; struct xenbus_map_node *node; struct vm_struct *area; - pte_t *pte; + pte_t *pte[XENBUS_MAX_RING_PAGES]; + int i; + int err = 0; + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; *vaddr = NULL; @@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, if (!node) return -ENOMEM; - area = alloc_vm_area(PAGE_SIZE, &pte); + area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte); if (!area) { kfree(node); return -ENOMEM; } - op.host_addr = arbitrary_virt_to_machine(pte).maddr; + for (i = 0; i < nr_grefs; i++) { + op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte, + op[i].ref = gnt_ref[i], + op[i].dom = dev->otherend_id, + op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr; + }; if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) BUG(); - if (op.status != GNTST_okay) { - free_vm_area(area); - kfree(node); - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - return op.status; + node->nr_handles = nr_grefs; + node->area = area; + + for (i = 0; i < nr_grefs; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + node->handle[i] = INVALID_GRANT_HANDLE; + continue; + } + node->handle[i] = op[i].handle; } - node->handle = op.handle; - node->area = area; + if (err != 0) { + for (i = 0; i < nr_grefs; i++) + xenbus_dev_fatal(dev, op[i].status, + "mapping in shared page %d from domain %d", + gnt_ref[i], dev->otherend_id); + + __xenbus_unmap_ring_vfree_pv(dev, node); + + return err; + } spin_lock(&xenbus_valloc_lock); list_add(&node->next, &xenbus_valloc_pages); @@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, } static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + int gnt_ref[], int nr_grefs, void **vaddr) { struct xenbus_map_node *node; int err; void *addr; + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + *vaddr = NULL; node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; - err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); + err = alloc_xenballooned_pages(nr_grefs, &node->page, + false /* lowmem */); if (err) goto out_err; addr = pfn_to_kaddr(page_to_pfn(node->page)); - err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); + err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr); if (err) goto out_err; @@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, return 0; out_err: - free_xenballooned_pages(1, &node->page); + free_xenballooned_pages(nr_grefs, &node->page); kfree(node); return err; } @@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, /** * xenbus_map_ring * @dev: xenbus device - * @gnt_ref: grant reference - * @handle: pointer to grant handle to be filled + * @gnt_ref: grant reference array + * @nr_grefs: number of grant references + * @handle: pointer to grant handle array to be filled, mind the size * @vaddr: address to be mapped to * - * Map a page of memory into this domain from another domain''s grant table. + * Map pages of memory into this domain from another domain''s grant table. * xenbus_map_ring does not allocate the virtual address space (you must do - * this yourself!). It only maps in the page to the specified address. + * this yourself!). It only maps in the pages to the specified address. * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) * or -ENOMEM on error. If an error is returned, device will switch to - * XenbusStateClosing and the error message will be saved in XenStore. + * XenbusStateClosing and the last error message will be saved in XenStore. */ -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr) +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, + grant_handle_t handle[], void *vaddr) { - struct gnttab_map_grant_ref op; - - gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, - dev->otherend_id); + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; + int i; + int err = GNTST_okay; /* 0 */ + + for (i = 0; i < nr_grefs; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + gnttab_set_map_op(&op[i], (phys_addr_t)addr, + GNTMAP_host_map, gnt_ref[i], + dev->otherend_id); + } - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs)) BUG(); - if (op.status != GNTST_okay) { - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - } else - *handle = op.handle; + for (i = 0; i < nr_grefs; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + xenbus_dev_fatal(dev, err, + "mapping in shared page %d from domain %d", + gnt_ref[i], dev->otherend_id); + handle[i] = INVALID_GRANT_HANDLE; + } else + handle[i] = op[i].handle; + } - return op.status; + if (err != GNTST_okay) + xenbus_unmap_ring(dev, handle, nr_grefs, vaddr); + + return err; } EXPORT_SYMBOL_GPL(xenbus_map_ring); @@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) } EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, + struct xenbus_map_node *node) +{ + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; + unsigned int level; + int i, j; + int err = GNTST_okay; + + j = 0; + for (i = 0; i < node->nr_handles; i++) { + unsigned long vaddr = (unsigned long)node->area->addr + + (PAGE_SIZE * i); + if (node->handle[i] != INVALID_GRANT_HANDLE) { + memset(&op[j], 0, sizeof(op[0])); + op[j].host_addr = arbitrary_virt_to_machine( + lookup_address(vaddr, &level)).maddr; + op[j].handle = node->handle[i]; + j++; + node->handle[i] = INVALID_GRANT_HANDLE; + } + } + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) + BUG(); + + node->nr_handles = 0; + + for (i = 0; i < j; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + xenbus_dev_error(dev, err, + "unmapping page %d at handle %d error %d", + i, op[i].handle, err); + } + } + + if (err == GNTST_okay) + free_vm_area(node->area); + + kfree(node); + + return err; +} + static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) { struct xenbus_map_node *node; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; - unsigned int level; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { @@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) if (!node) { xenbus_dev_error(dev, -ENOENT, - "can''t find mapped virtual address %p", vaddr); + "can''t find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - op.handle = node->handle; - op.host_addr = arbitrary_virt_to_machine( - lookup_address((unsigned long)vaddr, &level)).maddr; - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); - - if (op.status == GNTST_okay) - free_vm_area(node->area); - else - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - node->handle, op.status); - - kfree(node); - return op.status; + return __xenbus_unmap_ring_vfree_pv(dev, node); } static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) { int rv; struct xenbus_map_node *node; - void *addr; + void *addr = NULL; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { @@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) if (!node) { xenbus_dev_error(dev, -ENOENT, - "can''t find mapped virtual address %p", vaddr); + "can''t find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - rv = xenbus_unmap_ring(dev, node->handle, addr); + rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr); if (!rv) - free_xenballooned_pages(1, &node->page); + free_xenballooned_pages(node->nr_handles, &node->page); else WARN(1, "Leaking %p\n", vaddr); @@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) * xenbus_unmap_ring * @dev: xenbus device * @handle: grant handle + * @nr_handles: number of grant handle * @vaddr: addr to unmap * * Unmap a page of memory in this domain that was imported from another domain. @@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) * (see xen/include/interface/grant_table.h). */ int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr) + grant_handle_t handle[], int nr_handles, + void *vaddr) { - struct gnttab_unmap_grant_ref op; - - gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; + int i, j; + int err = GNTST_okay; + + j = 0; + for (i = 0; i < nr_handles; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + if (handle[i] != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr, + GNTMAP_host_map, handle[i]); + handle[i] = INVALID_GRANT_HANDLE; + } + } - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) BUG(); - if (op.status != GNTST_okay) - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - handle, op.status); + for (i = 0; i < j; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + xenbus_dev_error(dev, err, + "unmapping page at handle %d error %d", + handle[i], err); + } + } - return op.status; + return err; } EXPORT_SYMBOL_GPL(xenbus_unmap_ring); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 3864967..62b92d2 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -718,6 +718,7 @@ static int __init xenstored_local_init(void) return err; } +extern void xenbus_ring_ops_init(void); static int __init xenbus_init(void) { int err = 0; @@ -767,6 +768,8 @@ static int __init xenbus_init(void) proc_mkdir("xen", NULL); #endif + xenbus_ring_ops_init(); + out_error: return err; } diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index e8c599b..cdbd948 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, const char *pathfmt, ...); int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); -int xenbus_map_ring_valloc(struct xenbus_device *dev, - int gnt_ref, void **vaddr); -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr); + +#define XENBUS_MAX_RING_ORDER 2 +#define XENBUS_MAX_RING_PAGES (1 << XENBUS_MAX_RING_ORDER) + +#define INVALID_GRANT_HANDLE (~0U) + +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + int nr_pages, int grefs[]); +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], + int nr_grefs, void **vaddr); +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, + grant_handle_t handle[], void *vaddr); int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr); + grant_handle_t handle[], int nr_handles, + void *vaddr); int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);
Rusty Russell
2012-Mar-06 02:42 UTC
[PATCH 0001/001] xen: multi page ring support for block devices
On Mon, 5 Mar 2012 13:49:07 -0800, Santosh Jodh <Santosh.Jodh at citrix.com> wrote:> +/* Order of maximum shared ring size advertised to the front end. */ > +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER; > + > +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) > + > +static int set_max_ring_order(const char *buf, struct kernel_param *kp) > +{ > + int err; > + unsigned long order; > + > + err = kstrtol(buf, 0, &order); > + if (err || > + order < 0 || > + order > XENBUS_MAX_RING_ORDER) > + return -EINVAL;Hmm, order can't be < 0, since it's unsigned. So did you mean kstrtoull? And I think returning err is cleaner (it's -EINVAL for malformed strings, -ERANGE for ones too big).> + if (xen_blkif_reqs < BLK_RING_SIZE(order)) > + printk(KERN_WARNING "WARNING: " > + "I/O request space (%d reqs) < ring order %ld, " > + "consider increasing %s.reqs to >= %ld.", > + xen_blkif_reqs, order, KBUILD_MODNAME, > + roundup_pow_of_two(BLK_RING_SIZE(order)));This message doesn't mention the module namr or parameter name anywhere. Think of the poor sysadmins! Thanks, Rusty. -- How could I marry someone with more hair than me? http://baldalex.org
Rusty Russell
2012-Mar-06 02:42 UTC
Re: [PATCH 0001/001] xen: multi page ring support for block devices
On Mon, 5 Mar 2012 13:49:07 -0800, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:> +/* Order of maximum shared ring size advertised to the front end. */ > +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER; > + > +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) > + > +static int set_max_ring_order(const char *buf, struct kernel_param *kp) > +{ > + int err; > + unsigned long order; > + > + err = kstrtol(buf, 0, &order); > + if (err || > + order < 0 || > + order > XENBUS_MAX_RING_ORDER) > + return -EINVAL;Hmm, order can''t be < 0, since it''s unsigned. So did you mean kstrtoull? And I think returning err is cleaner (it''s -EINVAL for malformed strings, -ERANGE for ones too big).> + if (xen_blkif_reqs < BLK_RING_SIZE(order)) > + printk(KERN_WARNING "WARNING: " > + "I/O request space (%d reqs) < ring order %ld, " > + "consider increasing %s.reqs to >= %ld.", > + xen_blkif_reqs, order, KBUILD_MODNAME, > + roundup_pow_of_two(BLK_RING_SIZE(order)));This message doesn''t mention the module namr or parameter name anywhere. Think of the poor sysadmins! Thanks, Rusty. -- How could I marry someone with more hair than me? http://baldalex.org
Santosh Jodh
2012-Mar-06 06:21 UTC
Re: [PATCH 0001/001] xen: multi page ring support for block devices
Great feedback. I removed unsigned for the first, changed the error code and added module param name in the printk. Please see latest patch: --- diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 0088bf6..cc238e7 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -60,6 +60,40 @@ static int xen_blkif_reqs = 64; module_param_named(reqs, xen_blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); +/* Order of maximum shared ring size advertised to the front end. */ +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER; + +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) + +static int set_max_ring_order(const char *buf, struct kernel_param *kp) +{ + int err; + long order; + + err = kstrtol(buf, 0, &order); + if (err || + order < 0 || + order > XENBUS_MAX_RING_ORDER) + return -ERANGE; + + if (xen_blkif_reqs < BLK_RING_SIZE(order)) + printk(KERN_WARNING "WARNING: " + "I/O request space (%d reqs) < ring order %ld " + "set by module parameter %s.max_ring_order, " + "consider increasing %s.reqs to >= %ld.", + xen_blkif_reqs, order, KBUILD_MODNAME, KBUILD_MODNAME, + roundup_pow_of_two(BLK_RING_SIZE(order))); + + xen_blkif_max_ring_order = order; + + return 0; +} + +module_param_call(max_ring_order, + set_max_ring_order, param_get_int, + &xen_blkif_max_ring_order, 0644); +MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages."); + /* Run-time switchable: /sys/module/blkback/parameters/ */ static unsigned int log_stats; module_param(log_stats, int, 0644); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index d0ee7ed..5f33a1a 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -126,6 +126,8 @@ struct blkif_x86_64_response { int16_t status; /* BLKIF_RSP_??? */ }; +extern int xen_blkif_max_ring_order; + DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 24a2fb5..7a9d71d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) return blkif; } -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, - unsigned int evtchn) +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[], + unsigned int ring_order, unsigned int evtchn) { int err; @@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, if (blkif->irq) return 0; - err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); + err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order, + &blkif->blk_ring); if (err < 0) return err; @@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, { struct blkif_sring *sring; sring = (struct blkif_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.native, sring, + PAGE_SIZE << ring_order); break; } case BLKIF_PROTOCOL_X86_32: { struct blkif_x86_32_sring *sring_x86_32; sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, + PAGE_SIZE << ring_order); break; } case BLKIF_PROTOCOL_X86_64: { struct blkif_x86_64_sring *sring_x86_64; sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, + PAGE_SIZE << ring_order); break; } default: @@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev, if (err) goto fail; + err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", + "%u", xen_blkif_max_ring_order); + if (err) + goto fail; + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -744,22 +753,80 @@ again: static int connect_ring(struct backend_info *be) { struct xenbus_device *dev = be->dev; - unsigned long ring_ref; + int ring_ref[XENBUS_MAX_RING_PAGES]; + unsigned int ring_order; unsigned int evtchn; char protocol[64] = ""; int err; DPRINTK("%s", dev->otherend); - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", - &ring_ref, "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_fatal(dev, err, - "reading %s/ring-ref and event-channel", + err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", + &evtchn); + if (err != 1) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, "reading %s/event-channel", dev->otherend); return err; } + printk(KERN_INFO "blkback: event-channel %u\n", evtchn); + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", + &ring_order); + if (err != 1) { + DPRINTK("%s: using single page handshake", dev->otherend); + + ring_order = 0; + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", + "%d", &ring_ref[0]); + if (err != 1) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", + dev->otherend); + return err; + } + + printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]); + } else { + unsigned int i; + + if (ring_order > xen_blkif_max_ring_order) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, + "%s/ring-page-order too big", + dev->otherend); + return err; + } + + for (i = 0; i < (1u << ring_order); i++) { + char ring_ref_name[10]; + + snprintf(ring_ref_name, sizeof(ring_ref_name), + "ring-ref%u", i); + + err = xenbus_scanf(XBT_NIL, dev->otherend, + ring_ref_name, "%d", + &ring_ref[i]); + if (err != 1) { + err = -EINVAL; + + xenbus_dev_fatal(dev, err, + "reading %s/%s", + dev->otherend, + ring_ref_name); + return err; + } + + printk(KERN_INFO "blkback: ring-ref%u %d\n", i, + ring_ref[i]); + } + } + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", "%63s", protocol, NULL); @@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ - err = xen_blkif_map(be->blkif, ring_ref, evtchn); + err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn); if (err) { - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", - ring_ref, evtchn); + xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn"); return err; } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2f22874..485813a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -57,6 +57,10 @@ #include <asm/xen/hypervisor.h> +static int xen_blkif_ring_order; +module_param_named(reqs, xen_blkif_ring_order, int, 0); +MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages."); + enum blkif_state { BLKIF_STATE_DISCONNECTED, BLKIF_STATE_CONNECTED, @@ -72,7 +76,8 @@ struct blk_shadow { static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) +#define BLK_MAX_RING_SIZE BLK_RING_SIZE(XENBUS_MAX_RING_ORDER) /* * We have one of these per vbd, whether ide, scsi or ''other''. They @@ -87,14 +92,15 @@ struct blkfront_info int vdevice; blkif_vdev_t handle; enum blkif_state connected; - int ring_ref; + int ring_ref[XENBUS_MAX_RING_PAGES]; + int ring_order; struct blkif_front_ring ring; struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; - struct blk_shadow shadow[BLK_RING_SIZE]; + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; @@ -111,9 +117,7 @@ static unsigned int nr_minors; static unsigned long *minors; static DEFINE_SPINLOCK(minor_lock); -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) -#define GRANT_INVALID_REF 0 +#define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 #define PARTS_PER_EXT_DISK 256 @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock); static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; - BUG_ON(free >= BLK_RING_SIZE); + BUG_ON(free >= BLK_MAX_RING_SIZE); info->shadow_free = info->shadow[free].req.u.rw.id; info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ return free; @@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { + int i; + /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&blkif_io_lock); info->connected = suspend ? @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend) flush_work_sync(&info->work); /* Free resources associated with old device channel. */ - if (info->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - info->ring_ref = GRANT_INVALID_REF; - info->ring.sring = NULL; + for (i = 0; i < (1 << info->ring_order); i++) { + if (info->ring_ref[i] != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); + info->ring_ref[i] = GRANT_INVALID_REF; + } } + + free_pages((unsigned long)info->ring.sring, info->ring_order); + info->ring.sring = NULL; + if (info->irq) unbind_from_irqhandler(info->irq, info); info->evtchn = info->irq = 0; - } static void blkif_completion(struct blk_shadow *s) @@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev, struct blkif_sring *sring; int err; - info->ring_ref = GRANT_INVALID_REF; - - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); + sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, + info->ring_order); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order); sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); + err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order, + info->ring_ref); if (err < 0) { - free_page((unsigned long)sring); + free_pages((unsigned long)sring, info->ring_order); info->ring.sring = NULL; goto fail; } - info->ring_ref = err; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev, { const char *message = NULL; struct xenbus_transaction xbt; + unsigned int ring_order; + int legacy_backend; + int i; int err; + for (i = 0; i < (1 << info->ring_order); i++) + info->ring_ref[i] = GRANT_INVALID_REF; + + err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u", + &ring_order); + + legacy_backend = !(err == 1); + + if (legacy_backend) { + info->ring_order = 0; + } else { + info->ring_order = (ring_order <= xen_blkif_ring_order) ? + ring_order : + xen_blkif_ring_order; + } + /* Create shared ring, alloc event channel. */ err = setup_blkring(dev, info); if (err) @@ -889,12 +916,35 @@ again: goto destroy_blkring; } - err = xenbus_printf(xbt, dev->nodename, - "ring-ref", "%u", info->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; + if (legacy_backend) { + err = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%d", info->ring_ref[0]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } else { + for (i = 0; i < (1 << info->ring_order); i++) { + char key[sizeof("ring-ref") + 2]; + + sprintf(key, "ring-ref%d", i); + + err = xenbus_printf(xbt, dev->nodename, + key, "%d", info->ring_ref[i]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } + + err = xenbus_printf(xbt, dev->nodename, + "ring-page-order", "%u", info->ring_order); + if (err) { + message = "writing ring-order"; + goto abort_transaction; + } } + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", info->evtchn); if (err) { @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev, info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < BLK_MAX_RING_SIZE; i++) info->shadow[i].req.u.rw.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, ''/'')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); - err = talk_to_blkback(dev, info); - if (err) { - kfree(info); - dev_set_drvdata(&dev->dev, NULL); - return err; - } - return 0; } @@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < BLK_MAX_RING_SIZE; i++) info->shadow[i].req.u.rw.id = i+1; info->shadow_free = info->ring.req_prod_pvt; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Stage 3: Find pending requests and requeue them. */ - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) { /* Not in use? */ if (!copy[i].request) continue; @@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev, switch (backend_state) { case XenbusStateInitialising: - case XenbusStateInitWait: case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev, case XenbusStateClosed: break; + case XenbusStateInitWait: + talk_to_blkback(dev, info); + break; + case XenbusStateConnected: blkfront_connect(info); break; diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 94b79c3..f93b59a 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif); /* (Un)Map communication rings. */ void xen_netbk_unmap_frontend_rings(struct xenvif *vif); int xen_netbk_map_frontend_rings(struct xenvif *vif, - grant_ref_t tx_ring_ref, - grant_ref_t rx_ring_ref); + int tx_ring_ref, + int rx_ring_ref); /* (De)Register a xenvif with the netback backend. */ void xen_netbk_add_xenvif(struct xenvif *vif); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 59effac..0b014cf 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif) } int xen_netbk_map_frontend_rings(struct xenvif *vif, - grant_ref_t tx_ring_ref, - grant_ref_t rx_ring_ref) + int tx_ring_ref, + int rx_ring_ref) { void *addr; struct xen_netif_tx_sring *txs; @@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, int err = -ENOMEM; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - tx_ring_ref, &addr); + &tx_ring_ref, 1, &addr); if (err) goto err; @@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - rx_ring_ref, &addr); + &rx_ring_ref, 1, &addr); if (err) goto err; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 698b905..521a595 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) SHARED_RING_INIT(txs); FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(txs)); + err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref); if (err < 0) { free_page((unsigned long)txs); goto fail; } - info->tx_ring_ref = err; rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); if (!rxs) { err = -ENOMEM; @@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) SHARED_RING_INIT(rxs); FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); - err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); + err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref); if (err < 0) { free_page((unsigned long)rxs); goto fail; } - info->rx_ring_ref = err; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 1620088..95109d8 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev) int err = 0; struct xenbus_transaction trans; - err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref); if (err < 0) goto out; - pdev->gnt_ref = err; - err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); if (err) goto out; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 64b11f9..e0834cd 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", gnt_ref, remote_evtchn); - err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); + err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr); if (err < 0) { xenbus_dev_fatal(pdev->xdev, err, "Error mapping other domain page in ours."); diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 566d2ad..3a14524 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -53,14 +53,16 @@ struct xenbus_map_node { struct vm_struct *area; /* PV */ struct page *page; /* HVM */ }; - grant_handle_t handle; + grant_handle_t handle[XENBUS_MAX_RING_PAGES]; + unsigned int nr_handles; }; static DEFINE_SPINLOCK(xenbus_valloc_lock); static LIST_HEAD(xenbus_valloc_pages); struct xenbus_ring_ops { - int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); + int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts, + void **vaddr); int (*unmap)(struct xenbus_device *dev, void *vaddr); }; @@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, /** * xenbus_grant_ring * @dev: xenbus device - * @ring_mfn: mfn of ring to grant - - * Grant access to the given @ring_mfn to the peer of the given device. Return - * 0 on success, or -errno on error. On error, the device will switch to - * XenbusStateClosing, and the error will be saved in the store. + * @vaddr: starting virtual address of the ring + * @nr_pages: number of page to be granted + * @grefs: grant reference array to be filled in + * Grant access to the given @vaddr to the peer of the given device. + * Then fill in @grefs with grant references. Return 0 on success, or + * -errno on error. On error, the device will switch to + * XenbusStateClosing, and the first error will be saved in the store. */ -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + int nr_pages, int grefs[]) { - int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); - if (err < 0) - xenbus_dev_fatal(dev, err, "granting access to ring page"); + int i; + int err; + + for (i = 0; i < nr_pages; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + err = gnttab_grant_foreign_access(dev->otherend_id, + virt_to_mfn(addr), 0); + if (err < 0) { + xenbus_dev_fatal(dev, err, + "granting access to ring page"); + goto fail; + } + grefs[i] = err; + } + + return 0; + +fail: + for ( ; i >= 0; i--) + gnttab_end_foreign_access_ref(grefs[i], 0); return err; } EXPORT_SYMBOL_GPL(xenbus_grant_ring); @@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); /** * xenbus_map_ring_valloc * @dev: xenbus device - * @gnt_ref: grant reference + * @gnt_ref: grant reference array + * @nr_grefs: number of grant reference * @vaddr: pointer to address to be filled out by mapping * * Based on Rusty Russell''s skeleton driver''s map_page. @@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); * or -ENOMEM on error. If an error is returned, device will switch to * XenbusStateClosing and the error message will be saved in XenStore. */ -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], + int nr_grefs, void **vaddr) { - return ring_ops->map(dev, gnt_ref, vaddr); + return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr); } EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, + struct xenbus_map_node *node); + static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + int gnt_ref[], int nr_grefs, void **vaddr) { - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map | GNTMAP_contains_pte, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; struct xenbus_map_node *node; struct vm_struct *area; - pte_t *pte; + pte_t *pte[XENBUS_MAX_RING_PAGES]; + int i; + int err = 0; + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; *vaddr = NULL; @@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, if (!node) return -ENOMEM; - area = alloc_vm_area(PAGE_SIZE, &pte); + area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte); if (!area) { kfree(node); return -ENOMEM; } - op.host_addr = arbitrary_virt_to_machine(pte).maddr; + for (i = 0; i < nr_grefs; i++) { + op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte, + op[i].ref = gnt_ref[i], + op[i].dom = dev->otherend_id, + op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr; + }; if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) BUG(); - if (op.status != GNTST_okay) { - free_vm_area(area); - kfree(node); - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - return op.status; + node->nr_handles = nr_grefs; + node->area = area; + + for (i = 0; i < nr_grefs; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + node->handle[i] = INVALID_GRANT_HANDLE; + continue; + } + node->handle[i] = op[i].handle; } - node->handle = op.handle; - node->area = area; + if (err != 0) { + for (i = 0; i < nr_grefs; i++) + xenbus_dev_fatal(dev, op[i].status, + "mapping in shared page %d from domain %d", + gnt_ref[i], dev->otherend_id); + + __xenbus_unmap_ring_vfree_pv(dev, node); + + return err; + } spin_lock(&xenbus_valloc_lock); list_add(&node->next, &xenbus_valloc_pages); @@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, } static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, - int gnt_ref, void **vaddr) + int gnt_ref[], int nr_grefs, void **vaddr) { struct xenbus_map_node *node; int err; void *addr; + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + *vaddr = NULL; node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; - err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); + err = alloc_xenballooned_pages(nr_grefs, &node->page, + false /* lowmem */); if (err) goto out_err; addr = pfn_to_kaddr(page_to_pfn(node->page)); - err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); + err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr); if (err) goto out_err; @@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, return 0; out_err: - free_xenballooned_pages(1, &node->page); + free_xenballooned_pages(nr_grefs, &node->page); kfree(node); return err; } @@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, /** * xenbus_map_ring * @dev: xenbus device - * @gnt_ref: grant reference - * @handle: pointer to grant handle to be filled + * @gnt_ref: grant reference array + * @nr_grefs: number of grant references + * @handle: pointer to grant handle array to be filled, mind the size * @vaddr: address to be mapped to * - * Map a page of memory into this domain from another domain''s grant table. + * Map pages of memory into this domain from another domain''s grant table. * xenbus_map_ring does not allocate the virtual address space (you must do - * this yourself!). It only maps in the page to the specified address. + * this yourself!). It only maps in the pages to the specified address. * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) * or -ENOMEM on error. If an error is returned, device will switch to - * XenbusStateClosing and the error message will be saved in XenStore. + * XenbusStateClosing and the last error message will be saved in XenStore. */ -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr) +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, + grant_handle_t handle[], void *vaddr) { - struct gnttab_map_grant_ref op; - - gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, - dev->otherend_id); + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; + int i; + int err = GNTST_okay; /* 0 */ + + for (i = 0; i < nr_grefs; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + gnttab_set_map_op(&op[i], (phys_addr_t)addr, + GNTMAP_host_map, gnt_ref[i], + dev->otherend_id); + } - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs)) BUG(); - if (op.status != GNTST_okay) { - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - } else - *handle = op.handle; + for (i = 0; i < nr_grefs; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + xenbus_dev_fatal(dev, err, + "mapping in shared page %d from domain %d", + gnt_ref[i], dev->otherend_id); + handle[i] = INVALID_GRANT_HANDLE; + } else + handle[i] = op[i].handle; + } - return op.status; + if (err != GNTST_okay) + xenbus_unmap_ring(dev, handle, nr_grefs, vaddr); + + return err; } EXPORT_SYMBOL_GPL(xenbus_map_ring); @@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) } EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, + struct xenbus_map_node *node) +{ + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; + unsigned int level; + int i, j; + int err = GNTST_okay; + + j = 0; + for (i = 0; i < node->nr_handles; i++) { + unsigned long vaddr = (unsigned long)node->area->addr + + (PAGE_SIZE * i); + if (node->handle[i] != INVALID_GRANT_HANDLE) { + memset(&op[j], 0, sizeof(op[0])); + op[j].host_addr = arbitrary_virt_to_machine( + lookup_address(vaddr, &level)).maddr; + op[j].handle = node->handle[i]; + j++; + node->handle[i] = INVALID_GRANT_HANDLE; + } + } + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) + BUG(); + + node->nr_handles = 0; + + for (i = 0; i < j; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + xenbus_dev_error(dev, err, + "unmapping page %d at handle %d error %d", + i, op[i].handle, err); + } + } + + if (err == GNTST_okay) + free_vm_area(node->area); + + kfree(node); + + return err; +} + static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) { struct xenbus_map_node *node; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; - unsigned int level; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { @@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) if (!node) { xenbus_dev_error(dev, -ENOENT, - "can''t find mapped virtual address %p", vaddr); + "can''t find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - op.handle = node->handle; - op.host_addr = arbitrary_virt_to_machine( - lookup_address((unsigned long)vaddr, &level)).maddr; - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); - - if (op.status == GNTST_okay) - free_vm_area(node->area); - else - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - node->handle, op.status); - - kfree(node); - return op.status; + return __xenbus_unmap_ring_vfree_pv(dev, node); } static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) { int rv; struct xenbus_map_node *node; - void *addr; + void *addr = NULL; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { @@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) if (!node) { xenbus_dev_error(dev, -ENOENT, - "can''t find mapped virtual address %p", vaddr); + "can''t find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - rv = xenbus_unmap_ring(dev, node->handle, addr); + rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr); if (!rv) - free_xenballooned_pages(1, &node->page); + free_xenballooned_pages(node->nr_handles, &node->page); else WARN(1, "Leaking %p\n", vaddr); @@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) * xenbus_unmap_ring * @dev: xenbus device * @handle: grant handle + * @nr_handles: number of grant handle * @vaddr: addr to unmap * * Unmap a page of memory in this domain that was imported from another domain. @@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) * (see xen/include/interface/grant_table.h). */ int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr) + grant_handle_t handle[], int nr_handles, + void *vaddr) { - struct gnttab_unmap_grant_ref op; - - gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; + int i, j; + int err = GNTST_okay; + + j = 0; + for (i = 0; i < nr_handles; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + if (handle[i] != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr, + GNTMAP_host_map, handle[i]); + handle[i] = INVALID_GRANT_HANDLE; + } + } - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) BUG(); - if (op.status != GNTST_okay) - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - handle, op.status); + for (i = 0; i < j; i++) { + if (op[i].status != GNTST_okay) { + err = op[i].status; + xenbus_dev_error(dev, err, + "unmapping page at handle %d error %d", + handle[i], err); + } + } - return op.status; + return err; } EXPORT_SYMBOL_GPL(xenbus_unmap_ring); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 3864967..62b92d2 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -718,6 +718,7 @@ static int __init xenstored_local_init(void) return err; } +extern void xenbus_ring_ops_init(void); static int __init xenbus_init(void) { int err = 0; @@ -767,6 +768,8 @@ static int __init xenbus_init(void) proc_mkdir("xen", NULL); #endif + xenbus_ring_ops_init(); + out_error: return err; } diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index e8c599b..cdbd948 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, const char *pathfmt, ...); int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); -int xenbus_map_ring_valloc(struct xenbus_device *dev, - int gnt_ref, void **vaddr); -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr); + +#define XENBUS_MAX_RING_ORDER 2 +#define XENBUS_MAX_RING_PAGES (1 << XENBUS_MAX_RING_ORDER) + +#define INVALID_GRANT_HANDLE (~0U) + +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + int nr_pages, int grefs[]); +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], + int nr_grefs, void **vaddr); +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, + grant_handle_t handle[], void *vaddr); int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr); + grant_handle_t handle[], int nr_handles, + void *vaddr); int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);
Jan Beulich
2012-Mar-06 08:34 UTC
Re: [PATCH 0001/001] xen: multi page ring support for block devices
>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:Could this be split up into 3 patches, for easier reviewing: - one adjusting the xenbus interface to allow for multiple ring pages (and maybe even that one should be split into the backend and frontend related parts), syncing with the similar netback effort? - one for the blkback changes - one for the blkfront changes?> --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) > return blkif; > } > > -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > - unsigned int evtchn) > +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],As you need to touch this anyway, can you please switch this to the proper type (grant_ref_t) rather than using plain "int" (not just here)?> + unsigned int ring_order, unsigned int evtchn) > { > int err; > > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock); > static int get_id_from_freelist(struct blkfront_info *info) > { > unsigned long free = info->shadow_free; > - BUG_ON(free >= BLK_RING_SIZE); > + BUG_ON(free >= BLK_MAX_RING_SIZE);Wouldn''t you better check against the actual limit here?> info->shadow_free = info->shadow[free].req.u.rw.id; > info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ > return free; > @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend) > flush_work_sync(&info->work); > > /* Free resources associated with old device channel. */ > - if (info->ring_ref != GRANT_INVALID_REF) { > - gnttab_end_foreign_access(info->ring_ref, 0, > - (unsigned long)info->ring.sring); > - info->ring_ref = GRANT_INVALID_REF; > - info->ring.sring = NULL; > + for (i = 0; i < (1 << info->ring_order); i++) { > + if (info->ring_ref[i] != GRANT_INVALID_REF) { > + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); > + info->ring_ref[i] = GRANT_INVALID_REF; > + } > } > + > + free_pages((unsigned long)info->ring.sring, info->ring_order);No. The freeing must continue happen in gnttab_end_foreign_access() (with the sole exception when a page was allocated but the grant didn''t get established), since it must be suppressed/delayed when the grant is still in use (otherwise the kernel will die on the first re-use of the page). I just happened to fix that problem at the end of last week in the variant of the patch that we pulled into our tree. Further, rather than doing a non-zero order allocation here, I''d suggest allocating individual pages and vmap()-ing them.> + info->ring.sring = NULL; > + > if (info->irq) > unbind_from_irqhandler(info->irq, info); > info->evtchn = info->irq = 0; > - > } > > static void blkif_completion(struct blk_shadow *s) > @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev, > { > const char *message = NULL; > struct xenbus_transaction xbt; > + unsigned int ring_order; > + int legacy_backend; > + int i; > int err; > > + for (i = 0; i < (1 << info->ring_order); i++) > + info->ring_ref[i] = GRANT_INVALID_REF; > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u", > + &ring_order);At least the frontend should imo also support the alternative interface (using "max-ring-pages" etc).> + > + legacy_backend = !(err == 1); > + > + if (legacy_backend) { > + info->ring_order = 0; > + } else { > + info->ring_order = (ring_order <= xen_blkif_ring_order) ? > + ring_order : > + xen_blkif_ring_order;min()?> + } > + > /* Create shared ring, alloc event channel. */ > err = setup_blkring(dev, info); > if (err) > @@ -889,12 +916,35 @@ again: > goto destroy_blkring; > } > > - err = xenbus_printf(xbt, dev->nodename, > - "ring-ref", "%u", info->ring_ref); > - if (err) { > - message = "writing ring-ref"; > - goto abort_transaction; > + if (legacy_backend) {Why not use the simpler interface always when info->ring_order == 0?> + err = xenbus_printf(xbt, dev->nodename, > + "ring-ref", "%d", info->ring_ref[0]); > + if (err) { > + message = "writing ring-ref"; > + goto abort_transaction; > + } > + } else { > + for (i = 0; i < (1 << info->ring_order); i++) { > + char key[sizeof("ring-ref") + 2]; > + > + sprintf(key, "ring-ref%d", i); > + > + err = xenbus_printf(xbt, dev->nodename, > + key, "%d", info->ring_ref[i]); > + if (err) { > + message = "writing ring-ref"; > + goto abort_transaction; > + } > + } > + > + err = xenbus_printf(xbt, dev->nodename, > + "ring-page-order", "%u", info->ring_order); > + if (err) { > + message = "writing ring-order"; > + goto abort_transaction; > + } > } > + > err = xenbus_printf(xbt, dev->nodename, > "event-channel", "%u", info->evtchn); > if (err) { > @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev, > info->connected = BLKIF_STATE_DISCONNECTED; > INIT_WORK(&info->work, blkif_restart_queue); > > - for (i = 0; i < BLK_RING_SIZE; i++) > + for (i = 0; i < BLK_MAX_RING_SIZE; i++) > info->shadow[i].req.u.rw.id = i+1; > - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;A proper terminator must also be written in talk_to_blkback() once the actual ring size is known. Further, blkif_recover() must be able to deal with a change of the allowed upper bound.> /* Front end dir is a number, which is used as the id. */ > info->handle = simple_strtoul(strrchr(dev->nodename, ''/'')+1, NULL, 0); > dev_set_drvdata(&dev->dev, info); > > - err = talk_to_blkback(dev, info);Completely removing this here is wrong afaict - what if the backend already is in InitWait when the frontend starts? Further, whatever is done to this call here also needs to be done in blkfront_resume().> - if (err) { > - kfree(info); > - dev_set_drvdata(&dev->dev, NULL); > - return err; > - } > - > return 0; > } > > @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev, > case XenbusStateClosed: > break; > > + case XenbusStateInitWait: > + talk_to_blkback(dev, info);This call can return an error.> + break; > + > case XenbusStateConnected: > blkfront_connect(info); > break; > --- a/include/xen/xenbus.h > +++ b/include/xen/xenbus.h > @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, > const char *pathfmt, ...); > > int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); > -int xenbus_map_ring_valloc(struct xenbus_device *dev, > - int gnt_ref, void **vaddr); > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > - grant_handle_t *handle, void *vaddr); > + > +#define XENBUS_MAX_RING_ORDER 2 > +#define XENBUS_MAX_RING_PAGES (1 << XENBUS_MAX_RING_ORDER)Why do you need an artificial global limit here? Each driver can decide individually what its limit should be. Jan
Jan Beulich
2012-Mar-06 08:34 UTC
[PATCH 0001/001] xen: multi page ring support for block devices
>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh at citrix.com> wrote:Could this be split up into 3 patches, for easier reviewing: - one adjusting the xenbus interface to allow for multiple ring pages (and maybe even that one should be split into the backend and frontend related parts), syncing with the similar netback effort? - one for the blkback changes - one for the blkfront changes?> --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) > return blkif; > } > > -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > - unsigned int evtchn) > +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[],As you need to touch this anyway, can you please switch this to the proper type (grant_ref_t) rather than using plain "int" (not just here)?> + unsigned int ring_order, unsigned int evtchn) > { > int err; > > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock); > static int get_id_from_freelist(struct blkfront_info *info) > { > unsigned long free = info->shadow_free; > - BUG_ON(free >= BLK_RING_SIZE); > + BUG_ON(free >= BLK_MAX_RING_SIZE);Wouldn't you better check against the actual limit here?> info->shadow_free = info->shadow[free].req.u.rw.id; > info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ > return free; > @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend) > flush_work_sync(&info->work); > > /* Free resources associated with old device channel. */ > - if (info->ring_ref != GRANT_INVALID_REF) { > - gnttab_end_foreign_access(info->ring_ref, 0, > - (unsigned long)info->ring.sring); > - info->ring_ref = GRANT_INVALID_REF; > - info->ring.sring = NULL; > + for (i = 0; i < (1 << info->ring_order); i++) { > + if (info->ring_ref[i] != GRANT_INVALID_REF) { > + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); > + info->ring_ref[i] = GRANT_INVALID_REF; > + } > } > + > + free_pages((unsigned long)info->ring.sring, info->ring_order);No. The freeing must continue happen in gnttab_end_foreign_access() (with the sole exception when a page was allocated but the grant didn't get established), since it must be suppressed/delayed when the grant is still in use (otherwise the kernel will die on the first re-use of the page). I just happened to fix that problem at the end of last week in the variant of the patch that we pulled into our tree. Further, rather than doing a non-zero order allocation here, I'd suggest allocating individual pages and vmap()-ing them.> + info->ring.sring = NULL; > + > if (info->irq) > unbind_from_irqhandler(info->irq, info); > info->evtchn = info->irq = 0; > - > } > > static void blkif_completion(struct blk_shadow *s) > @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev, > { > const char *message = NULL; > struct xenbus_transaction xbt; > + unsigned int ring_order; > + int legacy_backend; > + int i; > int err; > > + for (i = 0; i < (1 << info->ring_order); i++) > + info->ring_ref[i] = GRANT_INVALID_REF; > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u", > + &ring_order);At least the frontend should imo also support the alternative interface (using "max-ring-pages" etc).> + > + legacy_backend = !(err == 1); > + > + if (legacy_backend) { > + info->ring_order = 0; > + } else { > + info->ring_order = (ring_order <= xen_blkif_ring_order) ? > + ring_order : > + xen_blkif_ring_order;min()?> + } > + > /* Create shared ring, alloc event channel. */ > err = setup_blkring(dev, info); > if (err) > @@ -889,12 +916,35 @@ again: > goto destroy_blkring; > } > > - err = xenbus_printf(xbt, dev->nodename, > - "ring-ref", "%u", info->ring_ref); > - if (err) { > - message = "writing ring-ref"; > - goto abort_transaction; > + if (legacy_backend) {Why not use the simpler interface always when info->ring_order == 0?> + err = xenbus_printf(xbt, dev->nodename, > + "ring-ref", "%d", info->ring_ref[0]); > + if (err) { > + message = "writing ring-ref"; > + goto abort_transaction; > + } > + } else { > + for (i = 0; i < (1 << info->ring_order); i++) { > + char key[sizeof("ring-ref") + 2]; > + > + sprintf(key, "ring-ref%d", i); > + > + err = xenbus_printf(xbt, dev->nodename, > + key, "%d", info->ring_ref[i]); > + if (err) { > + message = "writing ring-ref"; > + goto abort_transaction; > + } > + } > + > + err = xenbus_printf(xbt, dev->nodename, > + "ring-page-order", "%u", info->ring_order); > + if (err) { > + message = "writing ring-order"; > + goto abort_transaction; > + } > } > + > err = xenbus_printf(xbt, dev->nodename, > "event-channel", "%u", info->evtchn); > if (err) { > @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev, > info->connected = BLKIF_STATE_DISCONNECTED; > INIT_WORK(&info->work, blkif_restart_queue); > > - for (i = 0; i < BLK_RING_SIZE; i++) > + for (i = 0; i < BLK_MAX_RING_SIZE; i++) > info->shadow[i].req.u.rw.id = i+1; > - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff;A proper terminator must also be written in talk_to_blkback() once the actual ring size is known. Further, blkif_recover() must be able to deal with a change of the allowed upper bound.> /* Front end dir is a number, which is used as the id. */ > info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); > dev_set_drvdata(&dev->dev, info); > > - err = talk_to_blkback(dev, info);Completely removing this here is wrong afaict - what if the backend already is in InitWait when the frontend starts? Further, whatever is done to this call here also needs to be done in blkfront_resume().> - if (err) { > - kfree(info); > - dev_set_drvdata(&dev->dev, NULL); > - return err; > - } > - > return 0; > } > > @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev, > case XenbusStateClosed: > break; > > + case XenbusStateInitWait: > + talk_to_blkback(dev, info);This call can return an error.> + break; > + > case XenbusStateConnected: > blkfront_connect(info); > break; > --- a/include/xen/xenbus.h > +++ b/include/xen/xenbus.h > @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, > const char *pathfmt, ...); > > int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); > -int xenbus_map_ring_valloc(struct xenbus_device *dev, > - int gnt_ref, void **vaddr); > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > - grant_handle_t *handle, void *vaddr); > + > +#define XENBUS_MAX_RING_ORDER 2 > +#define XENBUS_MAX_RING_PAGES (1 << XENBUS_MAX_RING_ORDER)Why do you need an artificial global limit here? Each driver can decide individually what its limit should be. Jan
Wei Liu
2012-Mar-06 11:16 UTC
[Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
On Mon, 2012-03-05 at 21:49 +0000, Santosh Jodh wrote:> From: Santosh Jodh <santosh.jodh at citrix.com> > > Add support for multi page ring for block devices. > The number of pages is configurable for blkback via module parameter. > blkback reports max-ring-page-order to blkfront via xenstore. > blkfront reports its supported ring-page-order to blkback via xenstore. > blkfront reports multi page ring references via ring-refNN in xenstore. > The change allows newer blkfront to work with older blkback and > vice-versa. > Based on original patch by Paul Durrant. > > Signed-off-by: Santosh Jodh <santosh.jodh at citrix.com>Doesn't the xenbus interface change deserve another patch (as prerequisite for block devices change)? Or at least please mention the change in commit message? Wei.
Konrad Rzeszutek Wilk
2012-Mar-06 17:20 UTC
Re: [PATCH 0001/001] xen: multi page ring support for block devices
On Mon, Mar 5, 2012 at 4:49 PM, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:> From: Santosh Jodh <santosh.jodh@citrix.com> > > Add support for multi page ring for block devices. > The number of pages is configurable for blkback via module parameter. > blkback reports max-ring-page-order to blkfront via xenstore. > blkfront reports its supported ring-page-order to blkback via xenstore. > blkfront reports multi page ring references via ring-refNN in xenstore. > The change allows newer blkfront to work with older blkback and > vice-versa. > Based on original patch by Paul Durrant.you should include his SoB in this patch. The patch overall looks Ok, thought I do have some comments: -> the call to "xenbus_ring_ops_init();" looks like a bug-fix? If so, it should be a separate patch. -> the usage of XenbusStateInitWait? Why do we introduce that? Looks like a fix to something. -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal default size for SSD usage? 16? -> don''t do sprintf, use snprinf -> don''t use printk(KERN_..), use pr_info or the variant of pr_err,pr_debug, etc. -> don''t split the printk contents. It is Ok for them to be more than 80 lines. -> check that xen_blkif_ring_order is under XENBUS_MAX_RING_PAGES. Otherwise a joker could do = 9999999999999999999 for ring size and we would try to use that. -> Separate the patch that introduces the changes to the XenBus infrastructure (and then the changes to net* and blk*) to use the extra arguments would be folded in that patch. Then the patch that implements the multi ring to blkback is a patch that depends on that the XenBus modifications patch. Also make sure you CC David Miller and Jens Axboe on the XenBus patch as it modifies the net-* side which requires Ian''s and David''s Ack. -> Have you done a sanity/test check where the backend and frontend have different size rings? Just to make sure nothing explodes.> > Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com> > --- > diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c > index 0088bf6..72f2e18 100644 > --- a/drivers/block/xen-blkback/blkback.c > +++ b/drivers/block/xen-blkback/blkback.c > @@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64; > module_param_named(reqs, xen_blkif_reqs, int, 0); > MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); > > +/* Order of maximum shared ring size advertised to the front end. */ > +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER; > + > +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) > + > +static int set_max_ring_order(const char *buf, struct kernel_param *kp) > +{ > + int err; > + unsigned long order; > + > + err = kstrtol(buf, 0, &order); > + if (err || > + order < 0 || > + order > XENBUS_MAX_RING_ORDER) > + return -EINVAL; > + > + if (xen_blkif_reqs < BLK_RING_SIZE(order)) > + printk(KERN_WARNING "WARNING: " > + "I/O request space (%d reqs) < ring order %ld, " > + "consider increasing %s.reqs to >= %ld.", > + xen_blkif_reqs, order, KBUILD_MODNAME, > + roundup_pow_of_two(BLK_RING_SIZE(order))); > + > + xen_blkif_max_ring_order = order; > + > + return 0; > +} > + > +module_param_call(max_ring_order, > + set_max_ring_order, param_get_int, > + &xen_blkif_max_ring_order, 0644); > +MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages."); > + > /* Run-time switchable: /sys/module/blkback/parameters/ */ > static unsigned int log_stats; > module_param(log_stats, int, 0644); > diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h > index d0ee7ed..5f33a1a 100644 > --- a/drivers/block/xen-blkback/common.h > +++ b/drivers/block/xen-blkback/common.h > @@ -126,6 +126,8 @@ struct blkif_x86_64_response { > int16_t status; /* BLKIF_RSP_??? */ > }; > > +extern int xen_blkif_max_ring_order; > + > DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, > struct blkif_common_response); > DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > index 24a2fb5..7a9d71d 100644 > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) > return blkif; > } > > -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > - unsigned int evtchn) > +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[], > + unsigned int ring_order, unsigned int evtchn) > { > int err; > > @@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > if (blkif->irq) > return 0; > > - err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); > + err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order, > + &blkif->blk_ring); > if (err < 0) > return err; > > @@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > { > struct blkif_sring *sring; > sring = (struct blkif_sring *)blkif->blk_ring; > - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); > + BACK_RING_INIT(&blkif->blk_rings.native, sring, > + PAGE_SIZE << ring_order); > break; > } > case BLKIF_PROTOCOL_X86_32: > { > struct blkif_x86_32_sring *sring_x86_32; > sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; > - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); > + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, > + PAGE_SIZE << ring_order); > break; > } > case BLKIF_PROTOCOL_X86_64: > { > struct blkif_x86_64_sring *sring_x86_64; > sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; > - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); > + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, > + PAGE_SIZE << ring_order); > break; > } > default: > @@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev, > if (err) > goto fail; > > + err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", > + "%u", xen_blkif_max_ring_order); > + if (err) > + goto fail; > + > err = xenbus_switch_state(dev, XenbusStateInitWait); > if (err) > goto fail; > @@ -744,22 +753,80 @@ again: > static int connect_ring(struct backend_info *be) > { > struct xenbus_device *dev = be->dev; > - unsigned long ring_ref; > + int ring_ref[XENBUS_MAX_RING_PAGES]; > + unsigned int ring_order; > unsigned int evtchn; > char protocol[64] = ""; > int err; > > DPRINTK("%s", dev->otherend); > > - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", > - &ring_ref, "event-channel", "%u", &evtchn, NULL); > - if (err) { > - xenbus_dev_fatal(dev, err, > - "reading %s/ring-ref and event-channel", > + err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", > + &evtchn); > + if (err != 1) { > + err = -EINVAL; > + > + xenbus_dev_fatal(dev, err, "reading %s/event-channel", > dev->otherend); > return err; > } > > + printk(KERN_INFO "blkback: event-channel %u\n", evtchn); > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", > + &ring_order); > + if (err != 1) { > + DPRINTK("%s: using single page handshake", dev->otherend); > + > + ring_order = 0; > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", > + "%d", &ring_ref[0]); > + if (err != 1) { > + err = -EINVAL; > + > + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", > + dev->otherend); > + return err; > + } > + > + printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]); > + } else { > + unsigned int i; > + > + if (ring_order > xen_blkif_max_ring_order) { > + err = -EINVAL; > + > + xenbus_dev_fatal(dev, err, > + "%s/ring-page-order too big", > + dev->otherend); > + return err; > + } > + > + for (i = 0; i < (1u << ring_order); i++) { > + char ring_ref_name[10]; > + > + snprintf(ring_ref_name, sizeof(ring_ref_name), > + "ring-ref%u", i); > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, > + ring_ref_name, "%d", > + &ring_ref[i]); > + if (err != 1) { > + err = -EINVAL; > + > + xenbus_dev_fatal(dev, err, > + "reading %s/%s", > + dev->otherend, > + ring_ref_name); > + return err; > + } > + > + printk(KERN_INFO "blkback: ring-ref%u %d\n", i, > + ring_ref[i]); > + } > + } > + > be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; > err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", > "%63s", protocol, NULL); > @@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be) > xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); > return -1; > } > - pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", > - ring_ref, evtchn, be->blkif->blk_protocol, protocol); > > /* Map the shared frame, irq etc. */ > - err = xen_blkif_map(be->blkif, ring_ref, evtchn); > + err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn); > if (err) { > - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", > - ring_ref, evtchn); > + xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn"); > return err; > } > > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index 2f22874..485813a 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -57,6 +57,10 @@ > > #include <asm/xen/hypervisor.h> > > +static int xen_blkif_ring_order; > +module_param_named(reqs, xen_blkif_ring_order, int, 0); > +MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages."); > + > enum blkif_state { > BLKIF_STATE_DISCONNECTED, > BLKIF_STATE_CONNECTED, > @@ -72,7 +76,8 @@ struct blk_shadow { > static DEFINE_MUTEX(blkfront_mutex); > static const struct block_device_operations xlvbd_block_fops; > > -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) > +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) > +#define BLK_MAX_RING_SIZE BLK_RING_SIZE(XENBUS_MAX_RING_ORDER) > > /* > * We have one of these per vbd, whether ide, scsi or ''other''. They > @@ -87,14 +92,15 @@ struct blkfront_info > int vdevice; > blkif_vdev_t handle; > enum blkif_state connected; > - int ring_ref; > + int ring_ref[XENBUS_MAX_RING_PAGES]; > + int ring_order; > struct blkif_front_ring ring; > struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > unsigned int evtchn, irq; > struct request_queue *rq; > struct work_struct work; > struct gnttab_free_callback callback; > - struct blk_shadow shadow[BLK_RING_SIZE]; > + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; > unsigned long shadow_free; > unsigned int feature_flush; > unsigned int flush_op; > @@ -111,9 +117,7 @@ static unsigned int nr_minors; > static unsigned long *minors; > static DEFINE_SPINLOCK(minor_lock); > > -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ > - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) > -#define GRANT_INVALID_REF 0 > +#define GRANT_INVALID_REF 0 > > #define PARTS_PER_DISK 16 > #define PARTS_PER_EXT_DISK 256 > @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock); > static int get_id_from_freelist(struct blkfront_info *info) > { > unsigned long free = info->shadow_free; > - BUG_ON(free >= BLK_RING_SIZE); > + BUG_ON(free >= BLK_MAX_RING_SIZE); > info->shadow_free = info->shadow[free].req.u.rw.id; > info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ > return free; > @@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work) > > static void blkif_free(struct blkfront_info *info, int suspend) > { > + int i; > + > /* Prevent new requests being issued until we fix things up. */ > spin_lock_irq(&blkif_io_lock); > info->connected = suspend ? > @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend) > flush_work_sync(&info->work); > > /* Free resources associated with old device channel. */ > - if (info->ring_ref != GRANT_INVALID_REF) { > - gnttab_end_foreign_access(info->ring_ref, 0, > - (unsigned long)info->ring.sring); > - info->ring_ref = GRANT_INVALID_REF; > - info->ring.sring = NULL; > + for (i = 0; i < (1 << info->ring_order); i++) { > + if (info->ring_ref[i] != GRANT_INVALID_REF) { > + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); > + info->ring_ref[i] = GRANT_INVALID_REF; > + } > } > + > + free_pages((unsigned long)info->ring.sring, info->ring_order); > + info->ring.sring = NULL; > + > if (info->irq) > unbind_from_irqhandler(info->irq, info); > info->evtchn = info->irq = 0; > - > } > > static void blkif_completion(struct blk_shadow *s) > @@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev, > struct blkif_sring *sring; > int err; > > - info->ring_ref = GRANT_INVALID_REF; > - > - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); > + sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, > + info->ring_order); > if (!sring) { > xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); > return -ENOMEM; > } > SHARED_RING_INIT(sring); > - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); > + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order); > > sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); > > - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); > + err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order, > + info->ring_ref); > if (err < 0) { > - free_page((unsigned long)sring); > + free_pages((unsigned long)sring, info->ring_order); > info->ring.sring = NULL; > goto fail; > } > - info->ring_ref = err; > > err = xenbus_alloc_evtchn(dev, &info->evtchn); > if (err) > @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev, > { > const char *message = NULL; > struct xenbus_transaction xbt; > + unsigned int ring_order; > + int legacy_backend; > + int i; > int err; > > + for (i = 0; i < (1 << info->ring_order); i++) > + info->ring_ref[i] = GRANT_INVALID_REF; > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u", > + &ring_order); > + > + legacy_backend = !(err == 1); > + > + if (legacy_backend) { > + info->ring_order = 0; > + } else { > + info->ring_order = (ring_order <= xen_blkif_ring_order) ? > + ring_order : > + xen_blkif_ring_order; > + } > + > /* Create shared ring, alloc event channel. */ > err = setup_blkring(dev, info); > if (err) > @@ -889,12 +916,35 @@ again: > goto destroy_blkring; > } > > - err = xenbus_printf(xbt, dev->nodename, > - "ring-ref", "%u", info->ring_ref); > - if (err) { > - message = "writing ring-ref"; > - goto abort_transaction; > + if (legacy_backend) { > + err = xenbus_printf(xbt, dev->nodename, > + "ring-ref", "%d", info->ring_ref[0]); > + if (err) { > + message = "writing ring-ref"; > + goto abort_transaction; > + } > + } else { > + for (i = 0; i < (1 << info->ring_order); i++) { > + char key[sizeof("ring-ref") + 2]; > + > + sprintf(key, "ring-ref%d", i); > + > + err = xenbus_printf(xbt, dev->nodename, > + key, "%d", info->ring_ref[i]); > + if (err) { > + message = "writing ring-ref"; > + goto abort_transaction; > + } > + } > + > + err = xenbus_printf(xbt, dev->nodename, > + "ring-page-order", "%u", info->ring_order); > + if (err) { > + message = "writing ring-order"; > + goto abort_transaction; > + } > } > + > err = xenbus_printf(xbt, dev->nodename, > "event-channel", "%u", info->evtchn); > if (err) { > @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev, > info->connected = BLKIF_STATE_DISCONNECTED; > INIT_WORK(&info->work, blkif_restart_queue); > > - for (i = 0; i < BLK_RING_SIZE; i++) > + for (i = 0; i < BLK_MAX_RING_SIZE; i++) > info->shadow[i].req.u.rw.id = i+1; > - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > > /* Front end dir is a number, which is used as the id. */ > info->handle = simple_strtoul(strrchr(dev->nodename, ''/'')+1, NULL, 0); > dev_set_drvdata(&dev->dev, info); > > - err = talk_to_blkback(dev, info); > - if (err) { > - kfree(info); > - dev_set_drvdata(&dev->dev, NULL); > - return err; > - } > - > return 0; > } > > @@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info) > > /* Stage 2: Set up free list. */ > memset(&info->shadow, 0, sizeof(info->shadow)); > - for (i = 0; i < BLK_RING_SIZE; i++) > + for (i = 0; i < BLK_MAX_RING_SIZE; i++) > info->shadow[i].req.u.rw.id = i+1; > info->shadow_free = info->ring.req_prod_pvt; > - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > > /* Stage 3: Find pending requests and requeue them. */ > - for (i = 0; i < BLK_RING_SIZE; i++) { > + for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) { > /* Not in use? */ > if (!copy[i].request) > continue; > @@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev, > > switch (backend_state) { > case XenbusStateInitialising: > - case XenbusStateInitWait: > case XenbusStateInitialised: > case XenbusStateReconfiguring: > case XenbusStateReconfigured: > @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev, > case XenbusStateClosed: > break; > > + case XenbusStateInitWait: > + talk_to_blkback(dev, info); > + break; > + > case XenbusStateConnected: > blkfront_connect(info); > break; > diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h > index 94b79c3..f93b59a 100644 > --- a/drivers/net/xen-netback/common.h > +++ b/drivers/net/xen-netback/common.h > @@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif); > /* (Un)Map communication rings. */ > void xen_netbk_unmap_frontend_rings(struct xenvif *vif); > int xen_netbk_map_frontend_rings(struct xenvif *vif, > - grant_ref_t tx_ring_ref, > - grant_ref_t rx_ring_ref); > + int tx_ring_ref, > + int rx_ring_ref); > > /* (De)Register a xenvif with the netback backend. */ > void xen_netbk_add_xenvif(struct xenvif *vif); > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c > index 59effac..0b014cf 100644 > --- a/drivers/net/xen-netback/netback.c > +++ b/drivers/net/xen-netback/netback.c > @@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif) > } > > int xen_netbk_map_frontend_rings(struct xenvif *vif, > - grant_ref_t tx_ring_ref, > - grant_ref_t rx_ring_ref) > + int tx_ring_ref, > + int rx_ring_ref) > { > void *addr; > struct xen_netif_tx_sring *txs; > @@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, > int err = -ENOMEM; > > err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), > - tx_ring_ref, &addr); > + &tx_ring_ref, 1, &addr); > if (err) > goto err; > > @@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, > BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); > > err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), > - rx_ring_ref, &addr); > + &rx_ring_ref, 1, &addr); > if (err) > goto err; > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > index 698b905..521a595 100644 > --- a/drivers/net/xen-netfront.c > +++ b/drivers/net/xen-netfront.c > @@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) > SHARED_RING_INIT(txs); > FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); > > - err = xenbus_grant_ring(dev, virt_to_mfn(txs)); > + err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref); > if (err < 0) { > free_page((unsigned long)txs); > goto fail; > } > > - info->tx_ring_ref = err; > rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); > if (!rxs) { > err = -ENOMEM; > @@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) > SHARED_RING_INIT(rxs); > FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); > > - err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); > + err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref); > if (err < 0) { > free_page((unsigned long)rxs); > goto fail; > } > - info->rx_ring_ref = err; > > err = xenbus_alloc_evtchn(dev, &info->evtchn); > if (err) > diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c > index 1620088..95109d8 100644 > --- a/drivers/pci/xen-pcifront.c > +++ b/drivers/pci/xen-pcifront.c > @@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev) > int err = 0; > struct xenbus_transaction trans; > > - err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); > + err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref); > if (err < 0) > goto out; > > - pdev->gnt_ref = err; > - > err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); > if (err) > goto out; > diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c > index 64b11f9..e0834cd 100644 > --- a/drivers/xen/xen-pciback/xenbus.c > +++ b/drivers/xen/xen-pciback/xenbus.c > @@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, > "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", > gnt_ref, remote_evtchn); > > - err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); > + err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr); > if (err < 0) { > xenbus_dev_fatal(pdev->xdev, err, > "Error mapping other domain page in ours."); > diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c > index 566d2ad..3a14524 100644 > --- a/drivers/xen/xenbus/xenbus_client.c > +++ b/drivers/xen/xenbus/xenbus_client.c > @@ -53,14 +53,16 @@ struct xenbus_map_node { > struct vm_struct *area; /* PV */ > struct page *page; /* HVM */ > }; > - grant_handle_t handle; > + grant_handle_t handle[XENBUS_MAX_RING_PAGES]; > + unsigned int nr_handles; > }; > > static DEFINE_SPINLOCK(xenbus_valloc_lock); > static LIST_HEAD(xenbus_valloc_pages); > > struct xenbus_ring_ops { > - int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); > + int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts, > + void **vaddr); > int (*unmap)(struct xenbus_device *dev, void *vaddr); > }; > > @@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, > /** > * xenbus_grant_ring > * @dev: xenbus device > - * @ring_mfn: mfn of ring to grant > - > - * Grant access to the given @ring_mfn to the peer of the given device. Return > - * 0 on success, or -errno on error. On error, the device will switch to > - * XenbusStateClosing, and the error will be saved in the store. > + * @vaddr: starting virtual address of the ring > + * @nr_pages: number of page to be granted > + * @grefs: grant reference array to be filled in > + * Grant access to the given @vaddr to the peer of the given device. > + * Then fill in @grefs with grant references. Return 0 on success, or > + * -errno on error. On error, the device will switch to > + * XenbusStateClosing, and the first error will be saved in the store. > */ > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > + int nr_pages, int grefs[]) > { > - int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); > - if (err < 0) > - xenbus_dev_fatal(dev, err, "granting access to ring page"); > + int i; > + int err; > + > + for (i = 0; i < nr_pages; i++) { > + unsigned long addr = (unsigned long)vaddr + > + (PAGE_SIZE * i); > + err = gnttab_grant_foreign_access(dev->otherend_id, > + virt_to_mfn(addr), 0); > + if (err < 0) { > + xenbus_dev_fatal(dev, err, > + "granting access to ring page"); > + goto fail; > + } > + grefs[i] = err; > + } > + > + return 0; > + > +fail: > + for ( ; i >= 0; i--) > + gnttab_end_foreign_access_ref(grefs[i], 0); > return err; > } > EXPORT_SYMBOL_GPL(xenbus_grant_ring); > @@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); > /** > * xenbus_map_ring_valloc > * @dev: xenbus device > - * @gnt_ref: grant reference > + * @gnt_ref: grant reference array > + * @nr_grefs: number of grant reference > * @vaddr: pointer to address to be filled out by mapping > * > * Based on Rusty Russell''s skeleton driver''s map_page. > @@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); > * or -ENOMEM on error. If an error is returned, device will switch to > * XenbusStateClosing and the error message will be saved in XenStore. > */ > -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) > +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], > + int nr_grefs, void **vaddr) > { > - return ring_ops->map(dev, gnt_ref, vaddr); > + return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr); > } > EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); > > +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, > + struct xenbus_map_node *node); > + > static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, > - int gnt_ref, void **vaddr) > + int gnt_ref[], int nr_grefs, void **vaddr) > { > - struct gnttab_map_grant_ref op = { > - .flags = GNTMAP_host_map | GNTMAP_contains_pte, > - .ref = gnt_ref, > - .dom = dev->otherend_id, > - }; > + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; > struct xenbus_map_node *node; > struct vm_struct *area; > - pte_t *pte; > + pte_t *pte[XENBUS_MAX_RING_PAGES]; > + int i; > + int err = 0; > + > + if (nr_grefs > XENBUS_MAX_RING_PAGES) > + return -EINVAL; > > *vaddr = NULL; > > @@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, > if (!node) > return -ENOMEM; > > - area = alloc_vm_area(PAGE_SIZE, &pte); > + area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte); > if (!area) { > kfree(node); > return -ENOMEM; > } > > - op.host_addr = arbitrary_virt_to_machine(pte).maddr; > + for (i = 0; i < nr_grefs; i++) { > + op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte, > + op[i].ref = gnt_ref[i], > + op[i].dom = dev->otherend_id, > + op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr; > + }; > > if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) > BUG(); > > - if (op.status != GNTST_okay) { > - free_vm_area(area); > - kfree(node); > - xenbus_dev_fatal(dev, op.status, > - "mapping in shared page %d from domain %d", > - gnt_ref, dev->otherend_id); > - return op.status; > + node->nr_handles = nr_grefs; > + node->area = area; > + > + for (i = 0; i < nr_grefs; i++) { > + if (op[i].status != GNTST_okay) { > + err = op[i].status; > + node->handle[i] = INVALID_GRANT_HANDLE; > + continue; > + } > + node->handle[i] = op[i].handle; > } > > - node->handle = op.handle; > - node->area = area; > + if (err != 0) { > + for (i = 0; i < nr_grefs; i++) > + xenbus_dev_fatal(dev, op[i].status, > + "mapping in shared page %d from domain %d", > + gnt_ref[i], dev->otherend_id); > + > + __xenbus_unmap_ring_vfree_pv(dev, node); > + > + return err; > + } > > spin_lock(&xenbus_valloc_lock); > list_add(&node->next, &xenbus_valloc_pages); > @@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, > } > > static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, > - int gnt_ref, void **vaddr) > + int gnt_ref[], int nr_grefs, void **vaddr) > { > struct xenbus_map_node *node; > int err; > void *addr; > > + if (nr_grefs > XENBUS_MAX_RING_PAGES) > + return -EINVAL; > + > *vaddr = NULL; > > node = kzalloc(sizeof(*node), GFP_KERNEL); > if (!node) > return -ENOMEM; > > - err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); > + err = alloc_xenballooned_pages(nr_grefs, &node->page, > + false /* lowmem */); > if (err) > goto out_err; > > addr = pfn_to_kaddr(page_to_pfn(node->page)); > > - err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); > + err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr); > if (err) > goto out_err; > > @@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, > return 0; > > out_err: > - free_xenballooned_pages(1, &node->page); > + free_xenballooned_pages(nr_grefs, &node->page); > kfree(node); > return err; > } > @@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, > /** > * xenbus_map_ring > * @dev: xenbus device > - * @gnt_ref: grant reference > - * @handle: pointer to grant handle to be filled > + * @gnt_ref: grant reference array > + * @nr_grefs: number of grant references > + * @handle: pointer to grant handle array to be filled, mind the size > * @vaddr: address to be mapped to > * > - * Map a page of memory into this domain from another domain''s grant table. > + * Map pages of memory into this domain from another domain''s grant table. > * xenbus_map_ring does not allocate the virtual address space (you must do > - * this yourself!). It only maps in the page to the specified address. > + * this yourself!). It only maps in the pages to the specified address. > * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) > * or -ENOMEM on error. If an error is returned, device will switch to > - * XenbusStateClosing and the error message will be saved in XenStore. > + * XenbusStateClosing and the last error message will be saved in XenStore. > */ > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > - grant_handle_t *handle, void *vaddr) > +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, > + grant_handle_t handle[], void *vaddr) > { > - struct gnttab_map_grant_ref op; > - > - gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, > - dev->otherend_id); > + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; > + int i; > + int err = GNTST_okay; /* 0 */ > + > + for (i = 0; i < nr_grefs; i++) { > + unsigned long addr = (unsigned long)vaddr + > + (PAGE_SIZE * i); > + gnttab_set_map_op(&op[i], (phys_addr_t)addr, > + GNTMAP_host_map, gnt_ref[i], > + dev->otherend_id); > + } > > - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) > + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs)) > BUG(); > > - if (op.status != GNTST_okay) { > - xenbus_dev_fatal(dev, op.status, > - "mapping in shared page %d from domain %d", > - gnt_ref, dev->otherend_id); > - } else > - *handle = op.handle; > + for (i = 0; i < nr_grefs; i++) { > + if (op[i].status != GNTST_okay) { > + err = op[i].status; > + xenbus_dev_fatal(dev, err, > + "mapping in shared page %d from domain %d", > + gnt_ref[i], dev->otherend_id); > + handle[i] = INVALID_GRANT_HANDLE; > + } else > + handle[i] = op[i].handle; > + } > > - return op.status; > + if (err != GNTST_okay) > + xenbus_unmap_ring(dev, handle, nr_grefs, vaddr); > + > + return err; > } > EXPORT_SYMBOL_GPL(xenbus_map_ring); > > @@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) > } > EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); > > +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, > + struct xenbus_map_node *node) > +{ > + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; > + unsigned int level; > + int i, j; > + int err = GNTST_okay; > + > + j = 0; > + for (i = 0; i < node->nr_handles; i++) { > + unsigned long vaddr = (unsigned long)node->area->addr + > + (PAGE_SIZE * i); > + if (node->handle[i] != INVALID_GRANT_HANDLE) { > + memset(&op[j], 0, sizeof(op[0])); > + op[j].host_addr = arbitrary_virt_to_machine( > + lookup_address(vaddr, &level)).maddr; > + op[j].handle = node->handle[i]; > + j++; > + node->handle[i] = INVALID_GRANT_HANDLE; > + } > + } > + > + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) > + BUG(); > + > + node->nr_handles = 0; > + > + for (i = 0; i < j; i++) { > + if (op[i].status != GNTST_okay) { > + err = op[i].status; > + xenbus_dev_error(dev, err, > + "unmapping page %d at handle %d error %d", > + i, op[i].handle, err); > + } > + } > + > + if (err == GNTST_okay) > + free_vm_area(node->area); > + > + kfree(node); > + > + return err; > +} > + > static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) > { > struct xenbus_map_node *node; > - struct gnttab_unmap_grant_ref op = { > - .host_addr = (unsigned long)vaddr, > - }; > - unsigned int level; > > spin_lock(&xenbus_valloc_lock); > list_for_each_entry(node, &xenbus_valloc_pages, next) { > @@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) > > if (!node) { > xenbus_dev_error(dev, -ENOENT, > - "can''t find mapped virtual address %p", vaddr); > + "can''t find mapped virtual address %p", vaddr); > return GNTST_bad_virt_addr; > } > > - op.handle = node->handle; > - op.host_addr = arbitrary_virt_to_machine( > - lookup_address((unsigned long)vaddr, &level)).maddr; > - > - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) > - BUG(); > - > - if (op.status == GNTST_okay) > - free_vm_area(node->area); > - else > - xenbus_dev_error(dev, op.status, > - "unmapping page at handle %d error %d", > - node->handle, op.status); > - > - kfree(node); > - return op.status; > + return __xenbus_unmap_ring_vfree_pv(dev, node); > } > > static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > { > int rv; > struct xenbus_map_node *node; > - void *addr; > + void *addr = NULL; > > spin_lock(&xenbus_valloc_lock); > list_for_each_entry(node, &xenbus_valloc_pages, next) { > @@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > > if (!node) { > xenbus_dev_error(dev, -ENOENT, > - "can''t find mapped virtual address %p", vaddr); > + "can''t find mapped virtual address %p", vaddr); > return GNTST_bad_virt_addr; > } > > - rv = xenbus_unmap_ring(dev, node->handle, addr); > + rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr); > > if (!rv) > - free_xenballooned_pages(1, &node->page); > + free_xenballooned_pages(node->nr_handles, &node->page); > else > WARN(1, "Leaking %p\n", vaddr); > > @@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > * xenbus_unmap_ring > * @dev: xenbus device > * @handle: grant handle > + * @nr_handles: number of grant handle > * @vaddr: addr to unmap > * > * Unmap a page of memory in this domain that was imported from another domain. > @@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > * (see xen/include/interface/grant_table.h). > */ > int xenbus_unmap_ring(struct xenbus_device *dev, > - grant_handle_t handle, void *vaddr) > + grant_handle_t handle[], int nr_handles, > + void *vaddr) > { > - struct gnttab_unmap_grant_ref op; > - > - gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); > + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; > + int i, j; > + int err = GNTST_okay; > + > + j = 0; > + for (i = 0; i < nr_handles; i++) { > + unsigned long addr = (unsigned long)vaddr + > + (PAGE_SIZE * i); > + if (handle[i] != INVALID_GRANT_HANDLE) { > + gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr, > + GNTMAP_host_map, handle[i]); > + handle[i] = INVALID_GRANT_HANDLE; > + } > + } > > - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) > + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) > BUG(); > > - if (op.status != GNTST_okay) > - xenbus_dev_error(dev, op.status, > - "unmapping page at handle %d error %d", > - handle, op.status); > + for (i = 0; i < j; i++) { > + if (op[i].status != GNTST_okay) { > + err = op[i].status; > + xenbus_dev_error(dev, err, > + "unmapping page at handle %d error %d", > + handle[i], err); > + } > + } > > - return op.status; > + return err; > } > EXPORT_SYMBOL_GPL(xenbus_unmap_ring); > > diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c > index 3864967..62b92d2 100644 > --- a/drivers/xen/xenbus/xenbus_probe.c > +++ b/drivers/xen/xenbus/xenbus_probe.c > @@ -718,6 +718,7 @@ static int __init xenstored_local_init(void) > return err; > } > > +extern void xenbus_ring_ops_init(void); > static int __init xenbus_init(void) > { > int err = 0; > @@ -767,6 +768,8 @@ static int __init xenbus_init(void) > proc_mkdir("xen", NULL); > #endif > > + xenbus_ring_ops_init(); > + > out_error: > return err; > } > diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h > index e8c599b..cdbd948 100644 > --- a/include/xen/xenbus.h > +++ b/include/xen/xenbus.h > @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, > const char *pathfmt, ...); > > int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); > -int xenbus_map_ring_valloc(struct xenbus_device *dev, > - int gnt_ref, void **vaddr); > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > - grant_handle_t *handle, void *vaddr); > + > +#define XENBUS_MAX_RING_ORDER 2 > +#define XENBUS_MAX_RING_PAGES (1 << XENBUS_MAX_RING_ORDER) > + > +#define INVALID_GRANT_HANDLE (~0U) > + > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > + int nr_pages, int grefs[]); > +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], > + int nr_grefs, void **vaddr); > +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, > + grant_handle_t handle[], void *vaddr); > > int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); > int xenbus_unmap_ring(struct xenbus_device *dev, > - grant_handle_t handle, void *vaddr); > + grant_handle_t handle[], int nr_handles, > + void *vaddr); > > int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); > int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port); > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel >
Konrad Rzeszutek Wilk
2012-Mar-06 17:20 UTC
[Xen-devel] [PATCH 0001/001] xen: multi page ring support for block devices
On Mon, Mar 5, 2012 at 4:49 PM, Santosh Jodh <Santosh.Jodh at citrix.com> wrote:> From: Santosh Jodh <santosh.jodh at citrix.com> > > Add support for multi page ring for block devices. > The number of pages is configurable for blkback via module parameter. > blkback reports max-ring-page-order to blkfront via xenstore. > blkfront reports its supported ring-page-order to blkback via xenstore. > blkfront reports multi page ring references via ring-refNN in xenstore. > The change allows newer blkfront to work with older blkback and > vice-versa. > Based on original patch by Paul Durrant.you should include his SoB in this patch. The patch overall looks Ok, thought I do have some comments: -> the call to "xenbus_ring_ops_init();" looks like a bug-fix? If so, it should be a separate patch. -> the usage of XenbusStateInitWait? Why do we introduce that? Looks like a fix to something. -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal default size for SSD usage? 16? -> don't do sprintf, use snprinf -> don't use printk(KERN_..), use pr_info or the variant of pr_err,pr_debug, etc. -> don't split the printk contents. It is Ok for them to be more than 80 lines. -> check that xen_blkif_ring_order is under XENBUS_MAX_RING_PAGES. Otherwise a joker could do = 9999999999999999999 for ring size and we would try to use that. -> Separate the patch that introduces the changes to the XenBus infrastructure (and then the changes to net* and blk*) to use the extra arguments would be folded in that patch. Then the patch that implements the multi ring to blkback is a patch that depends on that the XenBus modifications patch. Also make sure you CC David Miller and Jens Axboe on the XenBus patch as it modifies the net-* side which requires Ian's and David's Ack. -> Have you done a sanity/test check where the backend and frontend have different size rings? Just to make sure nothing explodes.> > Signed-off-by: Santosh Jodh <santosh.jodh at citrix.com> > --- > diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c > index 0088bf6..72f2e18 100644 > --- a/drivers/block/xen-blkback/blkback.c > +++ b/drivers/block/xen-blkback/blkback.c > @@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64; > ?module_param_named(reqs, xen_blkif_reqs, int, 0); > ?MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); > > +/* Order of maximum shared ring size advertised to the front end. */ > +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER; > + > +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) > + > +static int set_max_ring_order(const char *buf, struct kernel_param *kp) > +{ > + ? ? ? int err; > + ? ? ? unsigned long order; > + > + ? ? ? err = kstrtol(buf, 0, &order); > + ? ? ? if (err || > + ? ? ? ? ? order < 0 || > + ? ? ? ? ? order > XENBUS_MAX_RING_ORDER) > + ? ? ? ? ? ? ? return -EINVAL; > + > + ? ? ? if (xen_blkif_reqs < BLK_RING_SIZE(order)) > + ? ? ? ? ? ? ? printk(KERN_WARNING "WARNING: " > + ? ? ? ? ? ? ? ? ? ? ?"I/O request space (%d reqs) < ring order %ld, " > + ? ? ? ? ? ? ? ? ? ? ?"consider increasing %s.reqs to >= %ld.", > + ? ? ? ? ? ? ? ? ? ? ?xen_blkif_reqs, order, KBUILD_MODNAME, > + ? ? ? ? ? ? ? ? ? ? ?roundup_pow_of_two(BLK_RING_SIZE(order))); > + > + ? ? ? xen_blkif_max_ring_order = order; > + > + ? ? ? return 0; > +} > + > +module_param_call(max_ring_order, > + ? ? ? ? ? ? ? ? set_max_ring_order, param_get_int, > + ? ? ? ? ? ? ? ? &xen_blkif_max_ring_order, 0644); > +MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages."); > + > ?/* Run-time switchable: /sys/module/blkback/parameters/ */ > ?static unsigned int log_stats; > ?module_param(log_stats, int, 0644); > diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h > index d0ee7ed..5f33a1a 100644 > --- a/drivers/block/xen-blkback/common.h > +++ b/drivers/block/xen-blkback/common.h > @@ -126,6 +126,8 @@ struct blkif_x86_64_response { > ? ? ? ?int16_t ? ? ? ? status; ? ? ? ? ?/* BLKIF_RSP_??? ? ? ? */ > ?}; > > +extern int xen_blkif_max_ring_order; > + > ?DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, > ? ? ? ? ? ? ? ? ?struct blkif_common_response); > ?DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > index 24a2fb5..7a9d71d 100644 > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) > ? ? ? ?return blkif; > ?} > > -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > - ? ? ? ? ? ? ? ? ? ? ? ?unsigned int evtchn) > +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[], > + ? ? ? ? ? ? ? ? ? ? ? ?unsigned int ring_order, unsigned int evtchn) > ?{ > ? ? ? ?int err; > > @@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > ? ? ? ?if (blkif->irq) > ? ? ? ? ? ? ? ?return 0; > > - ? ? ? err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); > + ? ? ? err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&blkif->blk_ring); > ? ? ? ?if (err < 0) > ? ? ? ? ? ? ? ?return err; > > @@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > ? ? ? ?{ > ? ? ? ? ? ? ? ?struct blkif_sring *sring; > ? ? ? ? ? ? ? ?sring = (struct blkif_sring *)blkif->blk_ring; > - ? ? ? ? ? ? ? BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); > + ? ? ? ? ? ? ? BACK_RING_INIT(&blkif->blk_rings.native, sring, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?PAGE_SIZE << ring_order); > ? ? ? ? ? ? ? ?break; > ? ? ? ?} > ? ? ? ?case BLKIF_PROTOCOL_X86_32: > ? ? ? ?{ > ? ? ? ? ? ? ? ?struct blkif_x86_32_sring *sring_x86_32; > ? ? ? ? ? ? ? ?sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; > - ? ? ? ? ? ? ? BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); > + ? ? ? ? ? ? ? BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?PAGE_SIZE << ring_order); > ? ? ? ? ? ? ? ?break; > ? ? ? ?} > ? ? ? ?case BLKIF_PROTOCOL_X86_64: > ? ? ? ?{ > ? ? ? ? ? ? ? ?struct blkif_x86_64_sring *sring_x86_64; > ? ? ? ? ? ? ? ?sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; > - ? ? ? ? ? ? ? BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); > + ? ? ? ? ? ? ? BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?PAGE_SIZE << ring_order); > ? ? ? ? ? ? ? ?break; > ? ? ? ?} > ? ? ? ?default: > @@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev, > ? ? ? ?if (err) > ? ? ? ? ? ? ? ?goto fail; > > + ? ? ? err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", > + ? ? ? ? ? ? ? ? ? ? ? ? ? "%u", xen_blkif_max_ring_order); > + ? ? ? if (err) > + ? ? ? ? ? ? ? goto fail; > + > ? ? ? ?err = xenbus_switch_state(dev, XenbusStateInitWait); > ? ? ? ?if (err) > ? ? ? ? ? ? ? ?goto fail; > @@ -744,22 +753,80 @@ again: > ?static int connect_ring(struct backend_info *be) > ?{ > ? ? ? ?struct xenbus_device *dev = be->dev; > - ? ? ? unsigned long ring_ref; > + ? ? ? int ring_ref[XENBUS_MAX_RING_PAGES]; > + ? ? ? unsigned int ring_order; > ? ? ? ?unsigned int evtchn; > ? ? ? ?char protocol[64] = ""; > ? ? ? ?int err; > > ? ? ? ?DPRINTK("%s", dev->otherend); > > - ? ? ? err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", > - ? ? ? ? ? ? ? ? ? ? ? ? ? &ring_ref, "event-channel", "%u", &evtchn, NULL); > - ? ? ? if (err) { > - ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"reading %s/ring-ref and event-channel", > + ? ? ? err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", > + ? ? ? ? ? ? ? ? ? ? ? ? ?&evtchn); > + ? ? ? if (err != 1) { > + ? ? ? ? ? ? ? err = -EINVAL; > + > + ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, "reading %s/event-channel", > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? dev->otherend); > ? ? ? ? ? ? ? ?return err; > ? ? ? ?} > > + ? ? ? printk(KERN_INFO "blkback: event-channel %u\n", evtchn); > + > + ? ? ? err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", > + ? ? ? ? ? ? ? ? ? ? ? ? ?&ring_order); > + ? ? ? if (err != 1) { > + ? ? ? ? ? ? ? DPRINTK("%s: using single page handshake", dev->otherend); > + > + ? ? ? ? ? ? ? ring_order = 0; > + > + ? ? ? ? ? ? ? err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"%d", &ring_ref[0]); > + ? ? ? ? ? ? ? if (err != 1) { > + ? ? ? ? ? ? ? ? ? ? ? err = -EINVAL; > + > + ? ? ? ? ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, "reading %s/ring-ref", > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?dev->otherend); > + ? ? ? ? ? ? ? ? ? ? ? return err; > + ? ? ? ? ? ? ? } > + > + ? ? ? ? ? ? ? printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]); > + ? ? ? } else { > + ? ? ? ? ? ? ? unsigned int i; > + > + ? ? ? ? ? ? ? if (ring_order > xen_blkif_max_ring_order) { > + ? ? ? ? ? ? ? ? ? ? ? err = -EINVAL; > + > + ? ? ? ? ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"%s/ring-page-order too big", > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?dev->otherend); > + ? ? ? ? ? ? ? ? ? ? ? return err; > + ? ? ? ? ? ? ? } > + > + ? ? ? ? ? ? ? for (i = 0; i < (1u << ring_order); i++) { > + ? ? ? ? ? ? ? ? ? ? ? char ring_ref_name[10]; > + > + ? ? ? ? ? ? ? ? ? ? ? snprintf(ring_ref_name, sizeof(ring_ref_name), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"ring-ref%u", i); > + > + ? ? ? ? ? ? ? ? ? ? ? err = xenbus_scanf(XBT_NIL, dev->otherend, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ring_ref_name, "%d", > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&ring_ref[i]); > + ? ? ? ? ? ? ? ? ? ? ? if (err != 1) { > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? err = -EINVAL; > + > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"reading %s/%s", > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?dev->otherend, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ring_ref_name); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? return err; > + ? ? ? ? ? ? ? ? ? ? ? } > + > + ? ? ? ? ? ? ? ? ? ? ? printk(KERN_INFO "blkback: ring-ref%u %d\n", i, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ring_ref[i]); > + ? ? ? ? ? ? ? } > + ? ? ? } > + > ? ? ? ?be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; > ? ? ? ?err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", > ? ? ? ? ? ? ? ? ? ? ? ? ? ?"%63s", protocol, NULL); > @@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be) > ? ? ? ? ? ? ? ?xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); > ? ? ? ? ? ? ? ?return -1; > ? ? ? ?} > - ? ? ? pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", > - ? ? ? ? ? ? ? ring_ref, evtchn, be->blkif->blk_protocol, protocol); > > ? ? ? ?/* Map the shared frame, irq etc. */ > - ? ? ? err = xen_blkif_map(be->blkif, ring_ref, evtchn); > + ? ? ? err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn); > ? ? ? ?if (err) { > - ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ring_ref, evtchn); > + ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn"); > ? ? ? ? ? ? ? ?return err; > ? ? ? ?} > > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index 2f22874..485813a 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -57,6 +57,10 @@ > > ?#include <asm/xen/hypervisor.h> > > +static int xen_blkif_ring_order; > +module_param_named(reqs, xen_blkif_ring_order, int, 0); > +MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages."); > + > ?enum blkif_state { > ? ? ? ?BLKIF_STATE_DISCONNECTED, > ? ? ? ?BLKIF_STATE_CONNECTED, > @@ -72,7 +76,8 @@ struct blk_shadow { > ?static DEFINE_MUTEX(blkfront_mutex); > ?static const struct block_device_operations xlvbd_block_fops; > > -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) > +#define BLK_RING_SIZE(_order) ?__CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) > +#define BLK_MAX_RING_SIZE ? ? ?BLK_RING_SIZE(XENBUS_MAX_RING_ORDER) > > ?/* > ?* We have one of these per vbd, whether ide, scsi or 'other'. ?They > @@ -87,14 +92,15 @@ struct blkfront_info > ? ? ? ?int vdevice; > ? ? ? ?blkif_vdev_t handle; > ? ? ? ?enum blkif_state connected; > - ? ? ? int ring_ref; > + ? ? ? int ring_ref[XENBUS_MAX_RING_PAGES]; > + ? ? ? int ring_order; > ? ? ? ?struct blkif_front_ring ring; > ? ? ? ?struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > ? ? ? ?unsigned int evtchn, irq; > ? ? ? ?struct request_queue *rq; > ? ? ? ?struct work_struct work; > ? ? ? ?struct gnttab_free_callback callback; > - ? ? ? struct blk_shadow shadow[BLK_RING_SIZE]; > + ? ? ? struct blk_shadow shadow[BLK_MAX_RING_SIZE]; > ? ? ? ?unsigned long shadow_free; > ? ? ? ?unsigned int feature_flush; > ? ? ? ?unsigned int flush_op; > @@ -111,9 +117,7 @@ static unsigned int nr_minors; > ?static unsigned long *minors; > ?static DEFINE_SPINLOCK(minor_lock); > > -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ > - ? ? ? (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) > -#define GRANT_INVALID_REF ? ? ?0 > +#define GRANT_INVALID_REF ? ? ?0 > > ?#define PARTS_PER_DISK ? ? ? ? 16 > ?#define PARTS_PER_EXT_DISK ? ? ?256 > @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock); > ?static int get_id_from_freelist(struct blkfront_info *info) > ?{ > ? ? ? ?unsigned long free = info->shadow_free; > - ? ? ? BUG_ON(free >= BLK_RING_SIZE); > + ? ? ? BUG_ON(free >= BLK_MAX_RING_SIZE); > ? ? ? ?info->shadow_free = info->shadow[free].req.u.rw.id; > ? ? ? ?info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ > ? ? ? ?return free; > @@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work) > > ?static void blkif_free(struct blkfront_info *info, int suspend) > ?{ > + ? ? ? int i; > + > ? ? ? ?/* Prevent new requests being issued until we fix things up. */ > ? ? ? ?spin_lock_irq(&blkif_io_lock); > ? ? ? ?info->connected = suspend ? > @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend) > ? ? ? ?flush_work_sync(&info->work); > > ? ? ? ?/* Free resources associated with old device channel. */ > - ? ? ? if (info->ring_ref != GRANT_INVALID_REF) { > - ? ? ? ? ? ? ? gnttab_end_foreign_access(info->ring_ref, 0, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (unsigned long)info->ring.sring); > - ? ? ? ? ? ? ? info->ring_ref = GRANT_INVALID_REF; > - ? ? ? ? ? ? ? info->ring.sring = NULL; > + ? ? ? for (i = 0; i < (1 << info->ring_order); i++) { > + ? ? ? ? ? ? ? if (info->ring_ref[i] != GRANT_INVALID_REF) { > + ? ? ? ? ? ? ? ? ? ? ? gnttab_end_foreign_access(info->ring_ref[i], 0, 0); > + ? ? ? ? ? ? ? ? ? ? ? info->ring_ref[i] = GRANT_INVALID_REF; > + ? ? ? ? ? ? ? } > ? ? ? ?} > + > + ? ? ? free_pages((unsigned long)info->ring.sring, info->ring_order); > + ? ? ? info->ring.sring = NULL; > + > ? ? ? ?if (info->irq) > ? ? ? ? ? ? ? ?unbind_from_irqhandler(info->irq, info); > ? ? ? ?info->evtchn = info->irq = 0; > - > ?} > > ?static void blkif_completion(struct blk_shadow *s) > @@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev, > ? ? ? ?struct blkif_sring *sring; > ? ? ? ?int err; > > - ? ? ? info->ring_ref = GRANT_INVALID_REF; > - > - ? ? ? sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); > + ? ? ? sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?info->ring_order); > ? ? ? ?if (!sring) { > ? ? ? ? ? ? ? ?xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); > ? ? ? ? ? ? ? ?return -ENOMEM; > ? ? ? ?} > ? ? ? ?SHARED_RING_INIT(sring); > - ? ? ? FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); > + ? ? ? FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order); > > ? ? ? ?sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); > > - ? ? ? err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); > + ? ? ? err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? info->ring_ref); > ? ? ? ?if (err < 0) { > - ? ? ? ? ? ? ? free_page((unsigned long)sring); > + ? ? ? ? ? ? ? free_pages((unsigned long)sring, info->ring_order); > ? ? ? ? ? ? ? ?info->ring.sring = NULL; > ? ? ? ? ? ? ? ?goto fail; > ? ? ? ?} > - ? ? ? info->ring_ref = err; > > ? ? ? ?err = xenbus_alloc_evtchn(dev, &info->evtchn); > ? ? ? ?if (err) > @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev, > ?{ > ? ? ? ?const char *message = NULL; > ? ? ? ?struct xenbus_transaction xbt; > + ? ? ? unsigned int ring_order; > + ? ? ? int legacy_backend; > + ? ? ? int i; > ? ? ? ?int err; > > + ? ? ? for (i = 0; i < (1 << info->ring_order); i++) > + ? ? ? ? ? ? ? info->ring_ref[i] = GRANT_INVALID_REF; > + > + ? ? ? err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u", > + ? ? ? ? ? ? ? ? ? ? ? ? ?&ring_order); > + > + ? ? ? legacy_backend = !(err == 1); > + > + ? ? ? if (legacy_backend) { > + ? ? ? ? ? ? ? info->ring_order = 0; > + ? ? ? } else { > + ? ? ? ? ? ? ? info->ring_order = (ring_order <= xen_blkif_ring_order) ? > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ring_order : > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?xen_blkif_ring_order; > + ? ? ? } > + > ? ? ? ?/* Create shared ring, alloc event channel. */ > ? ? ? ?err = setup_blkring(dev, info); > ? ? ? ?if (err) > @@ -889,12 +916,35 @@ again: > ? ? ? ? ? ? ? ?goto destroy_blkring; > ? ? ? ?} > > - ? ? ? err = xenbus_printf(xbt, dev->nodename, > - ? ? ? ? ? ? ? ? ? ? ? ? ? "ring-ref", "%u", info->ring_ref); > - ? ? ? if (err) { > - ? ? ? ? ? ? ? message = "writing ring-ref"; > - ? ? ? ? ? ? ? goto abort_transaction; > + ? ? ? if (legacy_backend) { > + ? ? ? ? ? ? ? err = xenbus_printf(xbt, dev->nodename, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "ring-ref", "%d", info->ring_ref[0]); > + ? ? ? ? ? ? ? if (err) { > + ? ? ? ? ? ? ? ? ? ? ? message = "writing ring-ref"; > + ? ? ? ? ? ? ? ? ? ? ? goto abort_transaction; > + ? ? ? ? ? ? ? } > + ? ? ? } else { > + ? ? ? ? ? ? ? for (i = 0; i < (1 << info->ring_order); i++) { > + ? ? ? ? ? ? ? ? ? ? ? char key[sizeof("ring-ref") + 2]; > + > + ? ? ? ? ? ? ? ? ? ? ? sprintf(key, "ring-ref%d", i); > + > + ? ? ? ? ? ? ? ? ? ? ? err = xenbus_printf(xbt, dev->nodename, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? key, "%d", info->ring_ref[i]); > + ? ? ? ? ? ? ? ? ? ? ? if (err) { > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? message = "writing ring-ref"; > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto abort_transaction; > + ? ? ? ? ? ? ? ? ? ? ? } > + ? ? ? ? ? ? ? } > + > + ? ? ? ? ? ? ? err = xenbus_printf(xbt, dev->nodename, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "ring-page-order", "%u", info->ring_order); > + ? ? ? ? ? ? ? if (err) { > + ? ? ? ? ? ? ? ? ? ? ? message = "writing ring-order"; > + ? ? ? ? ? ? ? ? ? ? ? goto abort_transaction; > + ? ? ? ? ? ? ? } > ? ? ? ?} > + > ? ? ? ?err = xenbus_printf(xbt, dev->nodename, > ? ? ? ? ? ? ? ? ? ? ? ? ? ?"event-channel", "%u", info->evtchn); > ? ? ? ?if (err) { > @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev, > ? ? ? ?info->connected = BLKIF_STATE_DISCONNECTED; > ? ? ? ?INIT_WORK(&info->work, blkif_restart_queue); > > - ? ? ? for (i = 0; i < BLK_RING_SIZE; i++) > + ? ? ? for (i = 0; i < BLK_MAX_RING_SIZE; i++) > ? ? ? ? ? ? ? ?info->shadow[i].req.u.rw.id = i+1; > - ? ? ? info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > + ? ? ? info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > > ? ? ? ?/* Front end dir is a number, which is used as the id. */ > ? ? ? ?info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); > ? ? ? ?dev_set_drvdata(&dev->dev, info); > > - ? ? ? err = talk_to_blkback(dev, info); > - ? ? ? if (err) { > - ? ? ? ? ? ? ? kfree(info); > - ? ? ? ? ? ? ? dev_set_drvdata(&dev->dev, NULL); > - ? ? ? ? ? ? ? return err; > - ? ? ? } > - > ? ? ? ?return 0; > ?} > > @@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info) > > ? ? ? ?/* Stage 2: Set up free list. */ > ? ? ? ?memset(&info->shadow, 0, sizeof(info->shadow)); > - ? ? ? for (i = 0; i < BLK_RING_SIZE; i++) > + ? ? ? for (i = 0; i < BLK_MAX_RING_SIZE; i++) > ? ? ? ? ? ? ? ?info->shadow[i].req.u.rw.id = i+1; > ? ? ? ?info->shadow_free = info->ring.req_prod_pvt; > - ? ? ? info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > + ? ? ? info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > > ? ? ? ?/* Stage 3: Find pending requests and requeue them. */ > - ? ? ? for (i = 0; i < BLK_RING_SIZE; i++) { > + ? ? ? for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) { > ? ? ? ? ? ? ? ?/* Not in use? */ > ? ? ? ? ? ? ? ?if (!copy[i].request) > ? ? ? ? ? ? ? ? ? ? ? ?continue; > @@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev, > > ? ? ? ?switch (backend_state) { > ? ? ? ?case XenbusStateInitialising: > - ? ? ? case XenbusStateInitWait: > ? ? ? ?case XenbusStateInitialised: > ? ? ? ?case XenbusStateReconfiguring: > ? ? ? ?case XenbusStateReconfigured: > @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev, > ? ? ? ?case XenbusStateClosed: > ? ? ? ? ? ? ? ?break; > > + ? ? ? case XenbusStateInitWait: > + ? ? ? ? ? ? ? talk_to_blkback(dev, info); > + ? ? ? ? ? ? ? break; > + > ? ? ? ?case XenbusStateConnected: > ? ? ? ? ? ? ? ?blkfront_connect(info); > ? ? ? ? ? ? ? ?break; > diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h > index 94b79c3..f93b59a 100644 > --- a/drivers/net/xen-netback/common.h > +++ b/drivers/net/xen-netback/common.h > @@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif); > ?/* (Un)Map communication rings. */ > ?void xen_netbk_unmap_frontend_rings(struct xenvif *vif); > ?int xen_netbk_map_frontend_rings(struct xenvif *vif, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?grant_ref_t tx_ring_ref, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?grant_ref_t rx_ring_ref); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int tx_ring_ref, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int rx_ring_ref); > > ?/* (De)Register a xenvif with the netback backend. */ > ?void xen_netbk_add_xenvif(struct xenvif *vif); > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c > index 59effac..0b014cf 100644 > --- a/drivers/net/xen-netback/netback.c > +++ b/drivers/net/xen-netback/netback.c > @@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif) > ?} > > ?int xen_netbk_map_frontend_rings(struct xenvif *vif, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?grant_ref_t tx_ring_ref, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?grant_ref_t rx_ring_ref) > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int tx_ring_ref, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int rx_ring_ref) > ?{ > ? ? ? ?void *addr; > ? ? ? ?struct xen_netif_tx_sring *txs; > @@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, > ? ? ? ?int err = -ENOMEM; > > ? ? ? ?err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tx_ring_ref, &addr); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&tx_ring_ref, 1, &addr); > ? ? ? ?if (err) > ? ? ? ? ? ? ? ?goto err; > > @@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, > ? ? ? ?BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); > > ? ? ? ?err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?rx_ring_ref, &addr); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&rx_ring_ref, 1, &addr); > ? ? ? ?if (err) > ? ? ? ? ? ? ? ?goto err; > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > index 698b905..521a595 100644 > --- a/drivers/net/xen-netfront.c > +++ b/drivers/net/xen-netfront.c > @@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) > ? ? ? ?SHARED_RING_INIT(txs); > ? ? ? ?FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); > > - ? ? ? err = xenbus_grant_ring(dev, virt_to_mfn(txs)); > + ? ? ? err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref); > ? ? ? ?if (err < 0) { > ? ? ? ? ? ? ? ?free_page((unsigned long)txs); > ? ? ? ? ? ? ? ?goto fail; > ? ? ? ?} > > - ? ? ? info->tx_ring_ref = err; > ? ? ? ?rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); > ? ? ? ?if (!rxs) { > ? ? ? ? ? ? ? ?err = -ENOMEM; > @@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) > ? ? ? ?SHARED_RING_INIT(rxs); > ? ? ? ?FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); > > - ? ? ? err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); > + ? ? ? err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref); > ? ? ? ?if (err < 0) { > ? ? ? ? ? ? ? ?free_page((unsigned long)rxs); > ? ? ? ? ? ? ? ?goto fail; > ? ? ? ?} > - ? ? ? info->rx_ring_ref = err; > > ? ? ? ?err = xenbus_alloc_evtchn(dev, &info->evtchn); > ? ? ? ?if (err) > diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c > index 1620088..95109d8 100644 > --- a/drivers/pci/xen-pcifront.c > +++ b/drivers/pci/xen-pcifront.c > @@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev) > ? ? ? ?int err = 0; > ? ? ? ?struct xenbus_transaction trans; > > - ? ? ? err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); > + ? ? ? err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref); > ? ? ? ?if (err < 0) > ? ? ? ? ? ? ? ?goto out; > > - ? ? ? pdev->gnt_ref = err; > - > ? ? ? ?err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); > ? ? ? ?if (err) > ? ? ? ? ? ? ? ?goto out; > diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c > index 64b11f9..e0834cd 100644 > --- a/drivers/xen/xen-pciback/xenbus.c > +++ b/drivers/xen/xen-pciback/xenbus.c > @@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, > ? ? ? ? ? ? ? ?"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", > ? ? ? ? ? ? ? ?gnt_ref, remote_evtchn); > > - ? ? ? err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); > + ? ? ? err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr); > ? ? ? ?if (err < 0) { > ? ? ? ? ? ? ? ?xenbus_dev_fatal(pdev->xdev, err, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"Error mapping other domain page in ours."); > diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c > index 566d2ad..3a14524 100644 > --- a/drivers/xen/xenbus/xenbus_client.c > +++ b/drivers/xen/xenbus/xenbus_client.c > @@ -53,14 +53,16 @@ struct xenbus_map_node { > ? ? ? ? ? ? ? ?struct vm_struct *area; /* PV */ > ? ? ? ? ? ? ? ?struct page *page; ? ? /* HVM */ > ? ? ? ?}; > - ? ? ? grant_handle_t handle; > + ? ? ? grant_handle_t handle[XENBUS_MAX_RING_PAGES]; > + ? ? ? unsigned int ? nr_handles; > ?}; > > ?static DEFINE_SPINLOCK(xenbus_valloc_lock); > ?static LIST_HEAD(xenbus_valloc_pages); > > ?struct xenbus_ring_ops { > - ? ? ? int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); > + ? ? ? int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts, > + ? ? ? ? ? ? ? ? ?void **vaddr); > ? ? ? ?int (*unmap)(struct xenbus_device *dev, void *vaddr); > ?}; > > @@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, > ?/** > ?* xenbus_grant_ring > ?* @dev: xenbus device > - * @ring_mfn: mfn of ring to grant > - > - * Grant access to the given @ring_mfn to the peer of the given device. ?Return > - * 0 on success, or -errno on error. ?On error, the device will switch to > - * XenbusStateClosing, and the error will be saved in the store. > + * @vaddr: starting virtual address of the ring > + * @nr_pages: number of page to be granted > + * @grefs: grant reference array to be filled in > + * Grant access to the given @vaddr to the peer of the given device. > + * Then fill in @grefs with grant references. ?Return 0 on success, or > + * -errno on error. ?On error, the device will switch to > + * XenbusStateClosing, and the first error will be saved in the store. > ?*/ > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > + ? ? ? ? ? ? ? ? ? ? int nr_pages, int grefs[]) > ?{ > - ? ? ? int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); > - ? ? ? if (err < 0) > - ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, "granting access to ring page"); > + ? ? ? int i; > + ? ? ? int err; > + > + ? ? ? for (i = 0; i < nr_pages; i++) { > + ? ? ? ? ? ? ? unsigned long addr = (unsigned long)vaddr + > + ? ? ? ? ? ? ? ? ? ? ? (PAGE_SIZE * i); > + ? ? ? ? ? ? ? err = gnttab_grant_foreign_access(dev->otherend_id, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? virt_to_mfn(addr), 0); > + ? ? ? ? ? ? ? if (err < 0) { > + ? ? ? ? ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"granting access to ring page"); > + ? ? ? ? ? ? ? ? ? ? ? goto fail; > + ? ? ? ? ? ? ? } > + ? ? ? ? ? ? ? grefs[i] = err; > + ? ? ? } > + > + ? ? ? return 0; > + > +fail: > + ? ? ? for ( ; i >= 0; i--) > + ? ? ? ? ? ? ? gnttab_end_foreign_access_ref(grefs[i], 0); > ? ? ? ?return err; > ?} > ?EXPORT_SYMBOL_GPL(xenbus_grant_ring); > @@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); > ?/** > ?* xenbus_map_ring_valloc > ?* @dev: xenbus device > - * @gnt_ref: grant reference > + * @gnt_ref: grant reference array > + * @nr_grefs: number of grant reference > ?* @vaddr: pointer to address to be filled out by mapping > ?* > ?* Based on Rusty Russell's skeleton driver's map_page. > @@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); > ?* or -ENOMEM on error. If an error is returned, device will switch to > ?* XenbusStateClosing and the error message will be saved in XenStore. > ?*/ > -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) > +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], > + ? ? ? ? ? ? ? ? ? ? ? ? ?int nr_grefs, void **vaddr) > ?{ > - ? ? ? return ring_ops->map(dev, gnt_ref, vaddr); > + ? ? ? return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr); > ?} > ?EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); > > +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct xenbus_map_node *node); > + > ?static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int gnt_ref, void **vaddr) > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int gnt_ref[], int nr_grefs, void **vaddr) > ?{ > - ? ? ? struct gnttab_map_grant_ref op = { > - ? ? ? ? ? ? ? .flags = GNTMAP_host_map | GNTMAP_contains_pte, > - ? ? ? ? ? ? ? .ref ? = gnt_ref, > - ? ? ? ? ? ? ? .dom ? = dev->otherend_id, > - ? ? ? }; > + ? ? ? struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; > ? ? ? ?struct xenbus_map_node *node; > ? ? ? ?struct vm_struct *area; > - ? ? ? pte_t *pte; > + ? ? ? pte_t *pte[XENBUS_MAX_RING_PAGES]; > + ? ? ? int i; > + ? ? ? int err = 0; > + > + ? ? ? if (nr_grefs > XENBUS_MAX_RING_PAGES) > + ? ? ? ? ? ? ? return -EINVAL; > > ? ? ? ?*vaddr = NULL; > > @@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, > ? ? ? ?if (!node) > ? ? ? ? ? ? ? ?return -ENOMEM; > > - ? ? ? area = alloc_vm_area(PAGE_SIZE, &pte); > + ? ? ? area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte); > ? ? ? ?if (!area) { > ? ? ? ? ? ? ? ?kfree(node); > ? ? ? ? ? ? ? ?return -ENOMEM; > ? ? ? ?} > > - ? ? ? op.host_addr = arbitrary_virt_to_machine(pte).maddr; > + ? ? ? for (i = 0; i < nr_grefs; i++) { > + ? ? ? ? ? ? ? op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte, > + ? ? ? ? ? ? ? op[i].ref ? = gnt_ref[i], > + ? ? ? ? ? ? ? op[i].dom ? = dev->otherend_id, > + ? ? ? ? ? ? ? op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr; > + ? ? ? }; > > ? ? ? ?if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) > ? ? ? ? ? ? ? ?BUG(); > > - ? ? ? if (op.status != GNTST_okay) { > - ? ? ? ? ? ? ? free_vm_area(area); > - ? ? ? ? ? ? ? kfree(node); > - ? ? ? ? ? ? ? xenbus_dev_fatal(dev, op.status, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"mapping in shared page %d from domain %d", > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?gnt_ref, dev->otherend_id); > - ? ? ? ? ? ? ? return op.status; > + ? ? ? node->nr_handles = nr_grefs; > + ? ? ? node->area = area; > + > + ? ? ? for (i = 0; i < nr_grefs; i++) { > + ? ? ? ? ? ? ? if (op[i].status != GNTST_okay) { > + ? ? ? ? ? ? ? ? ? ? ? err = op[i].status; > + ? ? ? ? ? ? ? ? ? ? ? node->handle[i] = INVALID_GRANT_HANDLE; > + ? ? ? ? ? ? ? ? ? ? ? continue; > + ? ? ? ? ? ? ? } > + ? ? ? ? ? ? ? node->handle[i] = op[i].handle; > ? ? ? ?} > > - ? ? ? node->handle = op.handle; > - ? ? ? node->area = area; > + ? ? ? if (err != 0) { > + ? ? ? ? ? ? ? for (i = 0; i < nr_grefs; i++) > + ? ? ? ? ? ? ? ? ? ? ? xenbus_dev_fatal(dev, op[i].status, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "mapping in shared page %d from domain %d", > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? gnt_ref[i], dev->otherend_id); > + > + ? ? ? ? ? ? ? ?__xenbus_unmap_ring_vfree_pv(dev, node); > + > + ? ? ? ? ? ? ? return err; > + ? ? ? } > > ? ? ? ?spin_lock(&xenbus_valloc_lock); > ? ? ? ?list_add(&node->next, &xenbus_valloc_pages); > @@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, > ?} > > ?static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int gnt_ref, void **vaddr) > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int gnt_ref[], int nr_grefs, void **vaddr) > ?{ > ? ? ? ?struct xenbus_map_node *node; > ? ? ? ?int err; > ? ? ? ?void *addr; > > + ? ? ? if (nr_grefs > XENBUS_MAX_RING_PAGES) > + ? ? ? ? ? ? ? return -EINVAL; > + > ? ? ? ?*vaddr = NULL; > > ? ? ? ?node = kzalloc(sizeof(*node), GFP_KERNEL); > ? ? ? ?if (!node) > ? ? ? ? ? ? ? ?return -ENOMEM; > > - ? ? ? err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); > + ? ? ? err = alloc_xenballooned_pages(nr_grefs, &node->page, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?false /* lowmem */); > ? ? ? ?if (err) > ? ? ? ? ? ? ? ?goto out_err; > > ? ? ? ?addr = pfn_to_kaddr(page_to_pfn(node->page)); > > - ? ? ? err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); > + ? ? ? err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr); > ? ? ? ?if (err) > ? ? ? ? ? ? ? ?goto out_err; > > @@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, > ? ? ? ?return 0; > > ?out_err: > - ? ? ? free_xenballooned_pages(1, &node->page); > + ? ? ? free_xenballooned_pages(nr_grefs, &node->page); > ? ? ? ?kfree(node); > ? ? ? ?return err; > ?} > @@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, > ?/** > ?* xenbus_map_ring > ?* @dev: xenbus device > - * @gnt_ref: grant reference > - * @handle: pointer to grant handle to be filled > + * @gnt_ref: grant reference array > + * @nr_grefs: number of grant references > + * @handle: pointer to grant handle array to be filled, mind the size > ?* @vaddr: address to be mapped to > ?* > - * Map a page of memory into this domain from another domain's grant table. > + * Map pages of memory into this domain from another domain's grant table. > ?* xenbus_map_ring does not allocate the virtual address space (you must do > - * this yourself!). It only maps in the page to the specified address. > + * this yourself!). It only maps in the pages to the specified address. > ?* Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) > ?* or -ENOMEM on error. If an error is returned, device will switch to > - * XenbusStateClosing and the error message will be saved in XenStore. > + * XenbusStateClosing and the last error message will be saved in XenStore. > ?*/ > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > - ? ? ? ? ? ? ? ? ? grant_handle_t *handle, void *vaddr) > +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, > + ? ? ? ? ? ? ? ? ? grant_handle_t handle[], void *vaddr) > ?{ > - ? ? ? struct gnttab_map_grant_ref op; > - > - ? ? ? gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, > - ? ? ? ? ? ? ? ? ? ? ? ? dev->otherend_id); > + ? ? ? struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; > + ? ? ? int i; > + ? ? ? int err = GNTST_okay; ? /* 0 */ > + > + ? ? ? for (i = 0; i < nr_grefs; i++) { > + ? ? ? ? ? ? ? unsigned long addr = (unsigned long)vaddr + > + ? ? ? ? ? ? ? ? ? ? ? (PAGE_SIZE * i); > + ? ? ? ? ? ? ? gnttab_set_map_op(&op[i], (phys_addr_t)addr, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GNTMAP_host_map, gnt_ref[i], > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? dev->otherend_id); > + ? ? ? } > > - ? ? ? if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) > + ? ? ? if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs)) > ? ? ? ? ? ? ? ?BUG(); > > - ? ? ? if (op.status != GNTST_okay) { > - ? ? ? ? ? ? ? xenbus_dev_fatal(dev, op.status, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"mapping in shared page %d from domain %d", > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?gnt_ref, dev->otherend_id); > - ? ? ? } else > - ? ? ? ? ? ? ? *handle = op.handle; > + ? ? ? for (i = 0; i < nr_grefs; i++) { > + ? ? ? ? ? ? ? if (op[i].status != GNTST_okay) { > + ? ? ? ? ? ? ? ? ? ? ? err = op[i].status; > + ? ? ? ? ? ? ? ? ? ? ? xenbus_dev_fatal(dev, err, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "mapping in shared page %d from domain %d", > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? gnt_ref[i], dev->otherend_id); > + ? ? ? ? ? ? ? ? ? ? ? handle[i] = INVALID_GRANT_HANDLE; > + ? ? ? ? ? ? ? } else > + ? ? ? ? ? ? ? ? ? ? ? handle[i] = op[i].handle; > + ? ? ? } > > - ? ? ? return op.status; > + ? ? ? if (err != GNTST_okay) > + ? ? ? ? ? ? ? xenbus_unmap_ring(dev, handle, nr_grefs, vaddr); > + > + ? ? ? return err; > ?} > ?EXPORT_SYMBOL_GPL(xenbus_map_ring); > > @@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) > ?} > ?EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); > > +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct xenbus_map_node *node) > +{ > + ? ? ? struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; > + ? ? ? unsigned int level; > + ? ? ? int i, j; > + ? ? ? int err = GNTST_okay; > + > + ? ? ? j = 0; > + ? ? ? for (i = 0; i < node->nr_handles; i++) { > + ? ? ? ? ? ? ? unsigned long vaddr = (unsigned long)node->area->addr + > + ? ? ? ? ? ? ? ? ? ? ? (PAGE_SIZE * i); > + ? ? ? ? ? ? ? if (node->handle[i] != INVALID_GRANT_HANDLE) { > + ? ? ? ? ? ? ? ? ? ? ? memset(&op[j], 0, sizeof(op[0])); > + ? ? ? ? ? ? ? ? ? ? ? op[j].host_addr = arbitrary_virt_to_machine( > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? lookup_address(vaddr, &level)).maddr; > + ? ? ? ? ? ? ? ? ? ? ? op[j].handle = node->handle[i]; > + ? ? ? ? ? ? ? ? ? ? ? j++; > + ? ? ? ? ? ? ? ? ? ? ? node->handle[i] = INVALID_GRANT_HANDLE; > + ? ? ? ? ? ? ? } > + ? ? ? } > + > + ? ? ? if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) > + ? ? ? ? ? ? ? BUG(); > + > + ? ? ? node->nr_handles = 0; > + > + ? ? ? for (i = 0; i < j; i++) { > + ? ? ? ? ? ? ? if (op[i].status != GNTST_okay) { > + ? ? ? ? ? ? ? ? ? ? ? err = op[i].status; > + ? ? ? ? ? ? ? ? ? ? ? xenbus_dev_error(dev, err, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "unmapping page %d at handle %d error %d", > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? i, op[i].handle, err); > + ? ? ? ? ? ? ? } > + ? ? ? } > + > + ? ? ? if (err == GNTST_okay) > + ? ? ? ? ? ? ? free_vm_area(node->area); > + > + ? ? ? kfree(node); > + > + ? ? ? return err; > +} > + > ?static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) > ?{ > ? ? ? ?struct xenbus_map_node *node; > - ? ? ? struct gnttab_unmap_grant_ref op = { > - ? ? ? ? ? ? ? .host_addr = (unsigned long)vaddr, > - ? ? ? }; > - ? ? ? unsigned int level; > > ? ? ? ?spin_lock(&xenbus_valloc_lock); > ? ? ? ?list_for_each_entry(node, &xenbus_valloc_pages, next) { > @@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) > > ? ? ? ?if (!node) { > ? ? ? ? ? ? ? ?xenbus_dev_error(dev, -ENOENT, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"can't find mapped virtual address %p", vaddr); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "can't find mapped virtual address %p", vaddr); > ? ? ? ? ? ? ? ?return GNTST_bad_virt_addr; > ? ? ? ?} > > - ? ? ? op.handle = node->handle; > - ? ? ? op.host_addr = arbitrary_virt_to_machine( > - ? ? ? ? ? ? ? lookup_address((unsigned long)vaddr, &level)).maddr; > - > - ? ? ? if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) > - ? ? ? ? ? ? ? BUG(); > - > - ? ? ? if (op.status == GNTST_okay) > - ? ? ? ? ? ? ? free_vm_area(node->area); > - ? ? ? else > - ? ? ? ? ? ? ? xenbus_dev_error(dev, op.status, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"unmapping page at handle %d error %d", > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?node->handle, op.status); > - > - ? ? ? kfree(node); > - ? ? ? return op.status; > + ? ? ? return __xenbus_unmap_ring_vfree_pv(dev, node); > ?} > > ?static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > ?{ > ? ? ? ?int rv; > ? ? ? ?struct xenbus_map_node *node; > - ? ? ? void *addr; > + ? ? ? void *addr = NULL; > > ? ? ? ?spin_lock(&xenbus_valloc_lock); > ? ? ? ?list_for_each_entry(node, &xenbus_valloc_pages, next) { > @@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > > ? ? ? ?if (!node) { > ? ? ? ? ? ? ? ?xenbus_dev_error(dev, -ENOENT, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"can't find mapped virtual address %p", vaddr); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "can't find mapped virtual address %p", vaddr); > ? ? ? ? ? ? ? ?return GNTST_bad_virt_addr; > ? ? ? ?} > > - ? ? ? rv = xenbus_unmap_ring(dev, node->handle, addr); > + ? ? ? rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr); > > ? ? ? ?if (!rv) > - ? ? ? ? ? ? ? free_xenballooned_pages(1, &node->page); > + ? ? ? ? ? ? ? free_xenballooned_pages(node->nr_handles, &node->page); > ? ? ? ?else > ? ? ? ? ? ? ? ?WARN(1, "Leaking %p\n", vaddr); > > @@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > ?* xenbus_unmap_ring > ?* @dev: xenbus device > ?* @handle: grant handle > + * @nr_handles: number of grant handle > ?* @vaddr: addr to unmap > ?* > ?* Unmap a page of memory in this domain that was imported from another domain. > @@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > ?* (see xen/include/interface/grant_table.h). > ?*/ > ?int xenbus_unmap_ring(struct xenbus_device *dev, > - ? ? ? ? ? ? ? ? ? ? grant_handle_t handle, void *vaddr) > + ? ? ? ? ? ? ? ? ? ? ? grant_handle_t handle[], int nr_handles, > + ? ? ? ? ? ? ? ? ? ? ? void *vaddr) > ?{ > - ? ? ? struct gnttab_unmap_grant_ref op; > - > - ? ? ? gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); > + ? ? ? struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; > + ? ? ? int i, j; > + ? ? ? int err = GNTST_okay; > + > + ? ? ? j = 0; > + ? ? ? for (i = 0; i < nr_handles; i++) { > + ? ? ? ? ? ? ? unsigned long addr = (unsigned long)vaddr + > + ? ? ? ? ? ? ? ? ? ? ? (PAGE_SIZE * i); > + ? ? ? ? ? ? ? if (handle[i] != INVALID_GRANT_HANDLE) { > + ? ? ? ? ? ? ? ? ? ? ? gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GNTMAP_host_map, handle[i]); > + ? ? ? ? ? ? ? ? ? ? ? handle[i] = INVALID_GRANT_HANDLE; > + ? ? ? ? ? ? ? } > + ? ? ? } > > - ? ? ? if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) > + ? ? ? if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) > ? ? ? ? ? ? ? ?BUG(); > > - ? ? ? if (op.status != GNTST_okay) > - ? ? ? ? ? ? ? xenbus_dev_error(dev, op.status, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"unmapping page at handle %d error %d", > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?handle, op.status); > + ? ? ? for (i = 0; i < j; i++) { > + ? ? ? ? ? ? ? if (op[i].status != GNTST_okay) { > + ? ? ? ? ? ? ? ? ? ? ? err = op[i].status; > + ? ? ? ? ? ? ? ? ? ? ? xenbus_dev_error(dev, err, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "unmapping page at handle %d error %d", > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? handle[i], err); > + ? ? ? ? ? ? ? } > + ? ? ? } > > - ? ? ? return op.status; > + ? ? ? return err; > ?} > ?EXPORT_SYMBOL_GPL(xenbus_unmap_ring); > > diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c > index 3864967..62b92d2 100644 > --- a/drivers/xen/xenbus/xenbus_probe.c > +++ b/drivers/xen/xenbus/xenbus_probe.c > @@ -718,6 +718,7 @@ static int __init xenstored_local_init(void) > ? ? ? ?return err; > ?} > > +extern void xenbus_ring_ops_init(void); > ?static int __init xenbus_init(void) > ?{ > ? ? ? ?int err = 0; > @@ -767,6 +768,8 @@ static int __init xenbus_init(void) > ? ? ? ?proc_mkdir("xen", NULL); > ?#endif > > + ? ? ? xenbus_ring_ops_init(); > + > ?out_error: > ? ? ? ?return err; > ?} > diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h > index e8c599b..cdbd948 100644 > --- a/include/xen/xenbus.h > +++ b/include/xen/xenbus.h > @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, > ? ? ? ? ? ? ? ? ? ? ? ? const char *pathfmt, ...); > > ?int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); > -int xenbus_map_ring_valloc(struct xenbus_device *dev, > - ? ? ? ? ? ? ? ? ? ? ? ? ?int gnt_ref, void **vaddr); > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > - ? ? ? ? ? ? ? ? ? ? ? ? ?grant_handle_t *handle, void *vaddr); > + > +#define ? ? ? ?XENBUS_MAX_RING_ORDER ? 2 > +#define ? ? ? ?XENBUS_MAX_RING_PAGES ? (1 << XENBUS_MAX_RING_ORDER) > + > +#define INVALID_GRANT_HANDLE ? ? ? ? ? (~0U) > + > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > + ? ? ? ? ? ? ? ? ? ? int nr_pages, int grefs[]); > +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], > + ? ? ? ? ? ? ? ? ? ? ? ? ?int nr_grefs, void **vaddr); > +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, > + ? ? ? ? ? ? ? ? ? grant_handle_t handle[], void *vaddr); > > ?int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); > ?int xenbus_unmap_ring(struct xenbus_device *dev, > - ? ? ? ? ? ? ? ? ? ? grant_handle_t handle, void *vaddr); > + ? ? ? ? ? ? ? ? ? ? grant_handle_t handle[], int nr_handles, > + ? ? ? ? ? ? ? ? ? ? void *vaddr); > > ?int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); > ?int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port); > > _______________________________________________ > Xen-devel mailing list > Xen-devel at lists.xen.org > http://lists.xen.org/xen-devel >
Jan Beulich
2012-Mar-07 09:33 UTC
Re: [PATCH 0001/001] xen: multi page ring support for block devices
>>> On 06.03.12 at 18:20, Konrad Rzeszutek Wilk <konrad@darnok.org> wrote: > -> the usage of XenbusStateInitWait? Why do we introduce that? Looks > like a fix to something.No, this is required to get the negotiation working (the frontend must not try to read the new nodes until it can be certain that the backend populated them). However, as already pointed out in an earlier reply to Santosh, the way this is done here doesn''t appear to allow for the backend to already be in InitWait state when the frontend gets invoked.> -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal > default size for SSD usage? 16?What do SSDs have to do with a XenBus definition? Imo it''s wrong (and unnecessary) to introduce a limit at the XenBus level at all - each driver can do this for itself. As to the limit for SSDs in the block interface - I don''t think the number of possibly simultaneous requests has anything to do with this. Instead, I''d expect the request number/size/segments extension that NetBSD apparently implements to possibly have an effect. Jan
Justin T. Gibbs
2012-Mar-14 17:17 UTC
Re: [PATCH 0001/001] xen: multi page ring support for block devices
On Mar 6, 2012, at 1:34 AM, Jan Beulich wrote:>>>> On 05.03.12 at 22:49, Santosh Jodh <Santosh.Jodh@citrix.com> wrote:…>> + } >> + >> /* Create shared ring, alloc event channel. */ >> err = setup_blkring(dev, info); >> if (err) >> @@ -889,12 +916,35 @@ again: >> goto destroy_blkring; >> } >> >> - err = xenbus_printf(xbt, dev->nodename, >> - "ring-ref", "%u", info->ring_ref); >> - if (err) { >> - message = "writing ring-ref"; >> - goto abort_transaction; >> + if (legacy_backend) { > > Why not use the simpler interface always when info->ring_order == 0?Because, as I just found out today via a FreeBSD bug report, that''s not how XenServer works. If the front-end publishes "ring-page-order", the backend assumes the "ring-refNN" XenStore nodes are in effect, even if the order is 0. I''m working on a documentation update for blkif.h now. <sigh> -- Justin
Reasonably Related Threads
- [PATCH 0001/001] xen: multi page ring support for block devices
- [PATCH 0001/001] xen: multi page ring support for block devices
- xenpaging fixes for kernel and hypervisor
- [PATCH 0 of 2] Paging support updates for XCP dom0
- [PATCH] linux-2.6.18/backends: use xenbus_be.ko interfaces instead of open-coding them