thr3ads.net - Xen devel - [PATCH v1 5/7] xen-blkback: make the queue of free requests per backend [Mar 2013]

If this information is useful, please help other people find it:
Share via:

Roger Pau Monne

2013-Mar-27 11:10 UTC

[PATCH v1 5/7] xen-blkback: make the queue of free requests per backend

Remove the last dependency from blkbk by moving the list of free
requests to blkif. This change reduces the contention on the list of
available requests.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: xen-devel@lists.xen.org
---
Changes since RFC:
 * Replace kzalloc with kcalloc.
---
 drivers/block/xen-blkback/blkback.c |  125 +++++++----------------------------
 drivers/block/xen-blkback/common.h  |   27 ++++++++
 drivers/block/xen-blkback/xenbus.c  |   19 +++++
 3 files changed, 70 insertions(+), 101 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c
b/drivers/block/xen-blkback/blkback.c
index e6542d5..a1c9dad 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -50,18 +50,14 @@
 #include "common.h"
 
 /*
- * These are rather arbitrary. They are fairly large because adjacent requests
- * pulled from a communication ring are quite likely to end up being part of
- * the same scatter/gather request at the disc.
- *
- * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
- *
- * This will increase the chances of being able to write whole tracks.
- * 64 should be enough to keep us competitive with Linux.
+ * This is the number of requests that will be pre-allocated for each backend.
+ * For better performance this is set to RING_SIZE (32), so requests
+ * in the ring will never have to wait for a free pending_req.
  */
-static int xen_blkif_reqs = 64;
+
+int xen_blkif_reqs = 32;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
-MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
+MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate per
backend");
 
 /*
  * Maximum number of unused free pages to keep in the internal buffer.
@@ -120,53 +116,11 @@ MODULE_PARM_DESC(lru_percent_clean,
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
 
-/*
- * Each outstanding request that we've passed to the lower device layers
has a
- * 'pending_req' allocated to it. Each buffer_head that completes
decrements
- * the pendcnt towards zero. When it hits zero, the specified domain has a
- * response queued for it, with the saved 'id' passed back.
- */
-struct pending_req {
-	struct xen_blkif	*blkif;
-	u64			id;
-	int			nr_pages;
-	atomic_t		pendcnt;
-	unsigned short		operation;
-	int			status;
-	struct list_head	free_list;
-	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	grant_handle_t		grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
-
 #define BLKBACK_INVALID_HANDLE (~0)
 
 /* Number of free pages to remove on each call to free_xenballooned_pages */
 #define NUM_BATCH_FREE_PAGES 10
 
-struct xen_blkbk {
-	struct pending_req	*pending_reqs;
-	/* List of all 'pending_req' available */
-	struct list_head	pending_free;
-	/* And its spinlock. */
-	spinlock_t		pending_free_lock;
-	wait_queue_head_t	pending_free_wq;
-};
-
-static struct xen_blkbk *blkbk;
-
-/*
- * Little helpful macro to figure out the index and virtual address of the
- * pending_pages[..]. For each 'pending_req' we have have up to
- * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
- * 10 and would index in the pending_pages[..].
- */
-static inline int vaddr_pagenr(struct pending_req *req, int seg)
-{
-	return (req - blkbk->pending_reqs) *
-		BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
-}
-
 static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
 {
 	unsigned long flags;
@@ -471,18 +425,18 @@ finished:
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be
used.
  */
-static struct pending_req *alloc_req(void)
+static struct pending_req *alloc_req(struct xen_blkif *blkif)
 {
 	struct pending_req *req = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-	if (!list_empty(&blkbk->pending_free)) {
-		req = list_entry(blkbk->pending_free.next, struct pending_req,
+	spin_lock_irqsave(&blkif->pending_free_lock, flags);
+	if (!list_empty(&blkif->pending_free)) {
+		req = list_entry(blkif->pending_free.next, struct pending_req,
 				 free_list);
 		list_del(&req->free_list);
 	}
-	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
 	return req;
 }
 
@@ -490,17 +444,17 @@ static struct pending_req *alloc_req(void)
  * Return the 'pending_req' structure back to the freepool. We also
  * wake up the thread if it was waiting for a free page.
  */
-static void free_req(struct pending_req *req)
+static void free_req(struct xen_blkif *blkif, struct pending_req *req)
 {
 	unsigned long flags;
 	int was_empty;
 
-	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-	was_empty = list_empty(&blkbk->pending_free);
-	list_add(&req->free_list, &blkbk->pending_free);
-	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	spin_lock_irqsave(&blkif->pending_free_lock, flags);
+	was_empty = list_empty(&blkif->pending_free);
+	list_add(&req->free_list, &blkif->pending_free);
+	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
 	if (was_empty)
-		wake_up(&blkbk->pending_free_wq);
+		wake_up(&blkif->pending_free_wq);
 }
 
 /*
@@ -635,8 +589,8 @@ int xen_blkif_schedule(void *arg)
 		if (timeout == 0)
 			goto purge_gnt_list;
 		timeout = wait_event_interruptible_timeout(
-			blkbk->pending_free_wq,
-			!list_empty(&blkbk->pending_free) ||
+			blkif->pending_free_wq,
+			!list_empty(&blkif->pending_free) ||
 			kthread_should_stop(),
 			timeout);
 		if (timeout == 0)
@@ -883,7 +837,7 @@ static int dispatch_other_io(struct xen_blkif *blkif,
 			     struct blkif_request *req,
 			     struct pending_req *pending_req)
 {
-	free_req(pending_req);
+	free_req(blkif, pending_req);
 	make_response(blkif, req->u.other.id, req->operation,
 		      BLKIF_RSP_EOPNOTSUPP);
 	return -EIO;
@@ -943,7 +897,7 @@ static void __end_block_io_op(struct pending_req
*pending_req, int error)
 			if (atomic_read(&pending_req->blkif->drain))
 				complete(&pending_req->blkif->drain_complete);
 		}
-		free_req(pending_req);
+		free_req(pending_req->blkif, pending_req);
 	}
 }
 
@@ -986,7 +940,7 @@ __do_block_io_op(struct xen_blkif *blkif)
 			break;
 		}
 
-		pending_req = alloc_req();
+		pending_req = alloc_req(blkif);
 		if (NULL == pending_req) {
 			blkif->st_oo_req++;
 			more_to_do = 1;
@@ -1020,7 +974,7 @@ __do_block_io_op(struct xen_blkif *blkif)
 				goto done;
 			break;
 		case BLKIF_OP_DISCARD:
-			free_req(pending_req);
+			free_req(blkif, pending_req);
 			if (dispatch_discard_io(blkif, &req))
 				goto done;
 			break;
@@ -1222,7 +1176,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
  fail_response:
 	/* Haven't submitted any bio's yet. */
 	make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
-	free_req(pending_req);
+	free_req(blkif, pending_req);
 	msleep(1); /* back off a bit */
 	return -EIO;
 
@@ -1279,51 +1233,20 @@ static void make_response(struct xen_blkif *blkif, u64
id,
 
 static int __init xen_blkif_init(void)
 {
-	int i;
 	int rc = 0;
 
 	if (!xen_domain())
 		return -ENODEV;
 
-	blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
-	if (!blkbk) {
-		pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
-		return -ENOMEM;
-	}
-
-
-	blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
-					xen_blkif_reqs, GFP_KERNEL);
-
-	if (!blkbk->pending_reqs) {
-		rc = -ENOMEM;
-		goto out_of_memory;
-	}
-
 	rc = xen_blkif_interface_init();
 	if (rc)
 		goto failed_init;
 
-	INIT_LIST_HEAD(&blkbk->pending_free);
-	spin_lock_init(&blkbk->pending_free_lock);
-	init_waitqueue_head(&blkbk->pending_free_wq);
-
-	for (i = 0; i < xen_blkif_reqs; i++)
-		list_add_tail(&blkbk->pending_reqs[i].free_list,
-			      &blkbk->pending_free);
-
 	rc = xen_blkif_xenbus_init();
 	if (rc)
 		goto failed_init;
 
-	return 0;
-
- out_of_memory:
-	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
  failed_init:
-	kfree(blkbk->pending_reqs);
-	kfree(blkbk);
-	blkbk = NULL;
 	return rc;
 }
 
diff --git a/drivers/block/xen-blkback/common.h
b/drivers/block/xen-blkback/common.h
index f43782c..debff44 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -236,6 +236,14 @@ struct xen_blkif {
 	int			free_pages_num;
 	struct list_head	free_pages;
 
+	/* Allocation of pending_reqs */
+	struct pending_req	*pending_reqs;
+	/* List of all 'pending_req' available */
+	struct list_head	pending_free;
+	/* And its spinlock. */
+	spinlock_t		pending_free_lock;
+	wait_queue_head_t	pending_free_wq;
+
 	/* statistics */
 	unsigned long		st_print;
 	unsigned long long			st_rd_req;
@@ -249,6 +257,25 @@ struct xen_blkif {
 	wait_queue_head_t	waiting_to_free;
 };
 
+/*
+ * Each outstanding request that we've passed to the lower device layers
has a
+ * 'pending_req' allocated to it. Each buffer_head that completes
decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+	struct xen_blkif	*blkif;
+	u64			id;
+	int			nr_pages;
+	atomic_t		pendcnt;
+	unsigned short		operation;
+	int			status;
+	struct list_head	free_list;
+	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	grant_handle_t		grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
 
 #define vbd_sz(_v)	((_v)->bdev->bd_part ? \
 			 (_v)->bdev->bd_part->nr_sects : \
diff --git a/drivers/block/xen-blkback/xenbus.c
b/drivers/block/xen-blkback/xenbus.c
index e0fd92a..bf7344f 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -30,6 +30,8 @@ struct backend_info {
 	char			*mode;
 };
 
+extern int xen_blkif_reqs;
+
 static struct kmem_cache *xen_blkif_cachep;
 static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
@@ -105,6 +107,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
 	struct xen_blkif *blkif;
+	int i;
 
 	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
 	if (!blkif)
@@ -124,6 +127,21 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->free_pages_num = 0;
 	atomic_set(&blkif->persistent_gnt_in_use, 0);
 
+	blkif->pending_reqs = kcalloc(xen_blkif_reqs,
+	                              sizeof(blkif->pending_reqs[0]),
+	                              GFP_KERNEL);
+	if (!blkif->pending_reqs) {
+		kmem_cache_free(xen_blkif_cachep, blkif);
+		return ERR_PTR(-ENOMEM);
+	}
+	INIT_LIST_HEAD(&blkif->pending_free);
+	spin_lock_init(&blkif->pending_free_lock);
+	init_waitqueue_head(&blkif->pending_free_wq);
+
+	for (i = 0; i < xen_blkif_reqs; i++)
+		list_add_tail(&blkif->pending_reqs[i].free_list,
+			      &blkif->pending_free);
+
 	return blkif;
 }
 
@@ -205,6 +223,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
 {
 	if (!atomic_dec_and_test(&blkif->refcnt))
 		BUG();
+	kfree(blkif->pending_reqs);
 	kmem_cache_free(xen_blkif_cachep, blkif);
 }
 
-- 
1.7.7.5 (Apple Git-26)


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Konrad Rzeszutek Wilk

2013-Apr-09 16:13 UTC

head link

Re: [PATCH v1 5/7] xen-blkback: make the queue of free requests per backend

On Wed, Mar 27, 2013 at 12:10:41PM +0100, Roger Pau Monne
wrote:> Remove the last dependency from blkbk by moving the list of free
> requests to blkif. This change reduces the contention on the list of
> available requests.
> 
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: xen-devel@lists.xen.org
> ---
> Changes since RFC:
>  * Replace kzalloc with kcalloc.
> ---
>  drivers/block/xen-blkback/blkback.c |  125
+++++++----------------------------
>  drivers/block/xen-blkback/common.h  |   27 ++++++++
>  drivers/block/xen-blkback/xenbus.c  |   19 +++++
>  3 files changed, 70 insertions(+), 101 deletions(-)
> 
> diff --git a/drivers/block/xen-blkback/blkback.c
b/drivers/block/xen-blkback/blkback.c
> index e6542d5..a1c9dad 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -50,18 +50,14 @@
>  #include "common.h"
>  
>  /*
> - * These are rather arbitrary. They are fairly large because adjacent
requests
> - * pulled from a communication ring are quite likely to end up being part
of
> - * the same scatter/gather request at the disc.
> - *
> - * ** TRY INCREASING ''xen_blkif_reqs'' IF WRITE SPEEDS
SEEM TOO LOW **
> - *
> - * This will increase the chances of being able to write whole tracks.
> - * 64 should be enough to keep us competitive with Linux.
> + * This is the number of requests that will be pre-allocated for each
backend.
> + * For better performance this is set to RING_SIZE (32), so requests
> + * in the ring will never have to wait for a free pending_req.
You might want to clarify that ''Any value less than that is sure to
cause problems.''

At which point perhaps we should have some logic to enforce
that?>   */
> -static int xen_blkif_reqs = 64;
> +
> +int xen_blkif_reqs = 32;
>  module_param_named(reqs, xen_blkif_reqs, int, 0);
> -MODULE_PARM_DESC(reqs, "Number of blkback requests to
allocate");
> +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate per
backend");

Do we even need this anymore? I mean if the optimial size is 32 and going
below that is silly. Going above that is silly as well - as we won''t be
using
the extra count.

Can we just rip this module parameter out?
>  
>  /*
>   * Maximum number of unused free pages to keep in the internal buffer.
> @@ -120,53 +116,11 @@ MODULE_PARM_DESC(lru_percent_clean,
>  static unsigned int log_stats;
>  module_param(log_stats, int, 0644);
>  
> -/*
> - * Each outstanding request that we''ve passed to the lower device
layers has a
> - * ''pending_req'' allocated to it. Each buffer_head that
completes decrements
> - * the pendcnt towards zero. When it hits zero, the specified domain has a
> - * response queued for it, with the saved ''id'' passed
back.
> - */
> -struct pending_req {
> -	struct xen_blkif	*blkif;
> -	u64			id;
> -	int			nr_pages;
> -	atomic_t		pendcnt;
> -	unsigned short		operation;
> -	int			status;
> -	struct list_head	free_list;
> -	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> -	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> -	grant_handle_t		grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> -};
> -
>  #define BLKBACK_INVALID_HANDLE (~0)
>  
>  /* Number of free pages to remove on each call to free_xenballooned_pages
*/
>  #define NUM_BATCH_FREE_PAGES 10
>  
> -struct xen_blkbk {
> -	struct pending_req	*pending_reqs;
> -	/* List of all ''pending_req'' available */
> -	struct list_head	pending_free;
> -	/* And its spinlock. */
> -	spinlock_t		pending_free_lock;
> -	wait_queue_head_t	pending_free_wq;
> -};
> -
> -static struct xen_blkbk *blkbk;
> -
> -/*
> - * Little helpful macro to figure out the index and virtual address of the
> - * pending_pages[..]. For each ''pending_req'' we have
have up to
> - * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0
through
> - * 10 and would index in the pending_pages[..].
> - */
> -static inline int vaddr_pagenr(struct pending_req *req, int seg)
> -{
> -	return (req - blkbk->pending_reqs) *
> -		BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
> -}
> -
>  static inline int get_free_page(struct xen_blkif *blkif, struct page
**page)
>  {
>  	unsigned long flags;
> @@ -471,18 +425,18 @@ finished:
>  /*
>   * Retrieve from the ''pending_reqs'' a free pending_req
structure to be used.
>   */
> -static struct pending_req *alloc_req(void)
> +static struct pending_req *alloc_req(struct xen_blkif *blkif)
>  {
>  	struct pending_req *req = NULL;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
> -	if (!list_empty(&blkbk->pending_free)) {
> -		req = list_entry(blkbk->pending_free.next, struct pending_req,
> +	spin_lock_irqsave(&blkif->pending_free_lock, flags);
> +	if (!list_empty(&blkif->pending_free)) {
> +		req = list_entry(blkif->pending_free.next, struct pending_req,
>  				 free_list);
>  		list_del(&req->free_list);
>  	}
> -	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
> +	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
>  	return req;
>  }
>  
> @@ -490,17 +444,17 @@ static struct pending_req *alloc_req(void)
>   * Return the ''pending_req'' structure back to the
freepool. We also
>   * wake up the thread if it was waiting for a free page.
>   */
> -static void free_req(struct pending_req *req)
> +static void free_req(struct xen_blkif *blkif, struct pending_req *req)
>  {
>  	unsigned long flags;
>  	int was_empty;
>  
> -	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
> -	was_empty = list_empty(&blkbk->pending_free);
> -	list_add(&req->free_list, &blkbk->pending_free);
> -	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
> +	spin_lock_irqsave(&blkif->pending_free_lock, flags);
> +	was_empty = list_empty(&blkif->pending_free);
> +	list_add(&req->free_list, &blkif->pending_free);
> +	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
>  	if (was_empty)
> -		wake_up(&blkbk->pending_free_wq);
> +		wake_up(&blkif->pending_free_wq);
>  }
>  
>  /*
> @@ -635,8 +589,8 @@ int xen_blkif_schedule(void *arg)
>  		if (timeout == 0)
>  			goto purge_gnt_list;
>  		timeout = wait_event_interruptible_timeout(
> -			blkbk->pending_free_wq,
> -			!list_empty(&blkbk->pending_free) ||
> +			blkif->pending_free_wq,
> +			!list_empty(&blkif->pending_free) ||
>  			kthread_should_stop(),
>  			timeout);
>  		if (timeout == 0)
> @@ -883,7 +837,7 @@ static int dispatch_other_io(struct xen_blkif *blkif,
>  			     struct blkif_request *req,
>  			     struct pending_req *pending_req)
>  {
> -	free_req(pending_req);
> +	free_req(blkif, pending_req);
>  	make_response(blkif, req->u.other.id, req->operation,
>  		      BLKIF_RSP_EOPNOTSUPP);
>  	return -EIO;
> @@ -943,7 +897,7 @@ static void __end_block_io_op(struct pending_req
*pending_req, int error)
>  			if (atomic_read(&pending_req->blkif->drain))
>  				complete(&pending_req->blkif->drain_complete);
>  		}
> -		free_req(pending_req);
> +		free_req(pending_req->blkif, pending_req);
>  	}
>  }
>  
> @@ -986,7 +940,7 @@ __do_block_io_op(struct xen_blkif *blkif)
>  			break;
>  		}
>  
> -		pending_req = alloc_req();
> +		pending_req = alloc_req(blkif);
>  		if (NULL == pending_req) {
>  			blkif->st_oo_req++;
>  			more_to_do = 1;
> @@ -1020,7 +974,7 @@ __do_block_io_op(struct xen_blkif *blkif)
>  				goto done;
>  			break;
>  		case BLKIF_OP_DISCARD:
> -			free_req(pending_req);
> +			free_req(blkif, pending_req);
>  			if (dispatch_discard_io(blkif, &req))
>  				goto done;
>  			break;
> @@ -1222,7 +1176,7 @@ static int dispatch_rw_block_io(struct xen_blkif
*blkif,
>   fail_response:
>  	/* Haven''t submitted any bio''s yet. */
>  	make_response(blkif, req->u.rw.id, req->operation,
BLKIF_RSP_ERROR);
> -	free_req(pending_req);
> +	free_req(blkif, pending_req);
>  	msleep(1); /* back off a bit */
>  	return -EIO;
>  
> @@ -1279,51 +1233,20 @@ static void make_response(struct xen_blkif *blkif,
u64 id,
>  
>  static int __init xen_blkif_init(void)
>  {
> -	int i;
>  	int rc = 0;
>  
>  	if (!xen_domain())
>  		return -ENODEV;
>  
> -	blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
> -	if (!blkbk) {
> -		pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
> -		return -ENOMEM;
> -	}
> -
> -
> -	blkbk->pending_reqs          =
kzalloc(sizeof(blkbk->pending_reqs[0]) *
> -					xen_blkif_reqs, GFP_KERNEL);
> -
> -	if (!blkbk->pending_reqs) {
> -		rc = -ENOMEM;
> -		goto out_of_memory;
> -	}
> -
>  	rc = xen_blkif_interface_init();
>  	if (rc)
>  		goto failed_init;
>  
> -	INIT_LIST_HEAD(&blkbk->pending_free);
> -	spin_lock_init(&blkbk->pending_free_lock);
> -	init_waitqueue_head(&blkbk->pending_free_wq);
> -
> -	for (i = 0; i < xen_blkif_reqs; i++)
> -		list_add_tail(&blkbk->pending_reqs[i].free_list,
> -			      &blkbk->pending_free);
> -
>  	rc = xen_blkif_xenbus_init();
>  	if (rc)
>  		goto failed_init;
>  
> -	return 0;
> -
> - out_of_memory:
> -	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
>   failed_init:
> -	kfree(blkbk->pending_reqs);
> -	kfree(blkbk);
> -	blkbk = NULL;
>  	return rc;
>  }
>  
> diff --git a/drivers/block/xen-blkback/common.h
b/drivers/block/xen-blkback/common.h
> index f43782c..debff44 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -236,6 +236,14 @@ struct xen_blkif {
>  	int			free_pages_num;
>  	struct list_head	free_pages;
>  
> +	/* Allocation of pending_reqs */
> +	struct pending_req	*pending_reqs;
> +	/* List of all ''pending_req'' available */
> +	struct list_head	pending_free;
> +	/* And its spinlock. */
> +	spinlock_t		pending_free_lock;
> +	wait_queue_head_t	pending_free_wq;
> +
>  	/* statistics */
>  	unsigned long		st_print;
>  	unsigned long long			st_rd_req;
> @@ -249,6 +257,25 @@ struct xen_blkif {
>  	wait_queue_head_t	waiting_to_free;
>  };
>  
> +/*
> + * Each outstanding request that we''ve passed to the lower device
layers has a
> + * ''pending_req'' allocated to it. Each buffer_head that
completes decrements
> + * the pendcnt towards zero. When it hits zero, the specified domain has a
> + * response queued for it, with the saved ''id'' passed
back.
> + */
> +struct pending_req {
> +	struct xen_blkif	*blkif;
> +	u64			id;
> +	int			nr_pages;
> +	atomic_t		pendcnt;
> +	unsigned short		operation;
> +	int			status;
> +	struct list_head	free_list;
> +	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +	grant_handle_t		grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +};
> +
>  
>  #define vbd_sz(_v)	((_v)->bdev->bd_part ? \
>  			 (_v)->bdev->bd_part->nr_sects : \
> diff --git a/drivers/block/xen-blkback/xenbus.c
b/drivers/block/xen-blkback/xenbus.c
> index e0fd92a..bf7344f 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -30,6 +30,8 @@ struct backend_info {
>  	char			*mode;
>  };
>  
> +extern int xen_blkif_reqs;
How about just sticking it in ''common.h''? 
> +
>  static struct kmem_cache *xen_blkif_cachep;
>  static void connect(struct backend_info *);
>  static int connect_ring(struct backend_info *);
> @@ -105,6 +107,7 @@ static void xen_update_blkif_status(struct xen_blkif
*blkif)
>  static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>  {
>  	struct xen_blkif *blkif;
> +	int i;
>  
>  	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
>  	if (!blkif)
> @@ -124,6 +127,21 @@ static struct xen_blkif *xen_blkif_alloc(domid_t
domid)
>  	blkif->free_pages_num = 0;
>  	atomic_set(&blkif->persistent_gnt_in_use, 0);
>  
> +	blkif->pending_reqs = kcalloc(xen_blkif_reqs,
> +	                              sizeof(blkif->pending_reqs[0]),
> +	                              GFP_KERNEL);
> +	if (!blkif->pending_reqs) {
> +		kmem_cache_free(xen_blkif_cachep, blkif);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +	INIT_LIST_HEAD(&blkif->pending_free);
> +	spin_lock_init(&blkif->pending_free_lock);
> +	init_waitqueue_head(&blkif->pending_free_wq);
> +
> +	for (i = 0; i < xen_blkif_reqs; i++)
> +		list_add_tail(&blkif->pending_reqs[i].free_list,
> +			      &blkif->pending_free);
> +
>  	return blkif;
>  }
>  
> @@ -205,6 +223,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
>  {
>  	if (!atomic_dec_and_test(&blkif->refcnt))
>  		BUG();
For sanity, should we have something like this:
	struct pending_req *p;
	i = 0;
	list_for_each_entry (p, &blkif->pending_free, free_list)
		i++;

	BUG_ON(i != xen_blkif_reqs);

As that would mean somebody did not call ''free_req'' and we
have a loose one laying
around.

Or perhaps just leak the offending struct and then don''t do
''kfree''?
> +	kfree(blkif->pending_reqs);
>  	kmem_cache_free(xen_blkif_cachep, blkif);
>  }
>  
> -- 
> 1.7.7.5 (Apple Git-26)
>

Roger Pau Monné

2013-Apr-15 13:50 UTC

head link

Re: [PATCH v1 5/7] xen-blkback: make the queue of free requests per backend

On 09/04/13 18:13, Konrad Rzeszutek Wilk wrote:> On Wed, Mar 27, 2013 at 12:10:41PM +0100, Roger Pau Monne wrote:
>> Remove the last dependency from blkbk by moving the list of free
>> requests to blkif. This change reduces the contention on the list of
>> available requests.
>>
>> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>> Cc: xen-devel@lists.xen.org
>> ---
>> Changes since RFC:
>>  * Replace kzalloc with kcalloc.
>> ---
>>  drivers/block/xen-blkback/blkback.c |  125
+++++++----------------------------
>>  drivers/block/xen-blkback/common.h  |   27 ++++++++
>>  drivers/block/xen-blkback/xenbus.c  |   19 +++++
>>  3 files changed, 70 insertions(+), 101 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/blkback.c
b/drivers/block/xen-blkback/blkback.c
>> index e6542d5..a1c9dad 100644
>> --- a/drivers/block/xen-blkback/blkback.c
>> +++ b/drivers/block/xen-blkback/blkback.c
>> @@ -50,18 +50,14 @@
>>  #include "common.h"
>>
>>  /*
>> - * These are rather arbitrary. They are fairly large because adjacent
requests
>> - * pulled from a communication ring are quite likely to end up being
part of
>> - * the same scatter/gather request at the disc.
>> - *
>> - * ** TRY INCREASING ''xen_blkif_reqs'' IF WRITE
SPEEDS SEEM TOO LOW **
>> - *
>> - * This will increase the chances of being able to write whole tracks.
>> - * 64 should be enough to keep us competitive with Linux.
>> + * This is the number of requests that will be pre-allocated for each
backend.
>> + * For better performance this is set to RING_SIZE (32), so requests
>> + * in the ring will never have to wait for a free pending_req.
> 
> You might want to clarify that ''Any value less than that is sure
to
> cause problems.''
> 
> At which point perhaps we should have some logic to enforce that?
>>   */
>> -static int xen_blkif_reqs = 64;
>> +
>> +int xen_blkif_reqs = 32;
>>  module_param_named(reqs, xen_blkif_reqs, int, 0);
>> -MODULE_PARM_DESC(reqs, "Number of blkback requests to
allocate");
>> +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate
per backend");
> 
> 
> Do we even need this anymore? I mean if the optimial size is 32 and going
> below that is silly. Going above that is silly as well - as we
won''t be using
> the extra count.
> 
> Can we just rip this module parameter out?
Wiped out, it doesn''t make sense.
> 
>>
>>  /*
>>   * Maximum number of unused free pages to keep in the internal buffer.
>> @@ -120,53 +116,11 @@ MODULE_PARM_DESC(lru_percent_clean,
>>  static unsigned int log_stats;
>>  module_param(log_stats, int, 0644);
>>
>> -/*
>> - * Each outstanding request that we''ve passed to the lower
device layers has a
>> - * ''pending_req'' allocated to it. Each buffer_head
that completes decrements
>> - * the pendcnt towards zero. When it hits zero, the specified domain
has a
>> - * response queued for it, with the saved ''id''
passed back.
>> - */
>> -struct pending_req {
>> -     struct xen_blkif        *blkif;
>> -     u64                     id;
>> -     int                     nr_pages;
>> -     atomic_t                pendcnt;
>> -     unsigned short          operation;
>> -     int                     status;
>> -     struct list_head        free_list;
>> -     struct page             *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>> -     struct persistent_gnt  
*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>> -     grant_handle_t         
grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>> -};
>> -
>>  #define BLKBACK_INVALID_HANDLE (~0)
>>
>>  /* Number of free pages to remove on each call to
free_xenballooned_pages */
>>  #define NUM_BATCH_FREE_PAGES 10
>>
>> -struct xen_blkbk {
>> -     struct pending_req      *pending_reqs;
>> -     /* List of all ''pending_req'' available */
>> -     struct list_head        pending_free;
>> -     /* And its spinlock. */
>> -     spinlock_t              pending_free_lock;
>> -     wait_queue_head_t       pending_free_wq;
>> -};
>> -
>> -static struct xen_blkbk *blkbk;
>> -
>> -/*
>> - * Little helpful macro to figure out the index and virtual address of
the
>> - * pending_pages[..]. For each ''pending_req'' we have
have up to
>> - * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0
through
>> - * 10 and would index in the pending_pages[..].
>> - */
>> -static inline int vaddr_pagenr(struct pending_req *req, int seg)
>> -{
>> -     return (req - blkbk->pending_reqs) *
>> -             BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
>> -}
>> -
>>  static inline int get_free_page(struct xen_blkif *blkif, struct page
**page)
>>  {
>>       unsigned long flags;
>> @@ -471,18 +425,18 @@ finished:
>>  /*
>>   * Retrieve from the ''pending_reqs'' a free
pending_req structure to be used.
>>   */
>> -static struct pending_req *alloc_req(void)
>> +static struct pending_req *alloc_req(struct xen_blkif *blkif)
>>  {
>>       struct pending_req *req = NULL;
>>       unsigned long flags;
>>
>> -     spin_lock_irqsave(&blkbk->pending_free_lock, flags);
>> -     if (!list_empty(&blkbk->pending_free)) {
>> -             req = list_entry(blkbk->pending_free.next, struct
pending_req,
>> +     spin_lock_irqsave(&blkif->pending_free_lock, flags);
>> +     if (!list_empty(&blkif->pending_free)) {
>> +             req = list_entry(blkif->pending_free.next, struct
pending_req,
>>                                free_list);
>>               list_del(&req->free_list);
>>       }
>> -     spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
>> +     spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
>>       return req;
>>  }
>>
>> @@ -490,17 +444,17 @@ static struct pending_req *alloc_req(void)
>>   * Return the ''pending_req'' structure back to the
freepool. We also
>>   * wake up the thread if it was waiting for a free page.
>>   */
>> -static void free_req(struct pending_req *req)
>> +static void free_req(struct xen_blkif *blkif, struct pending_req *req)
>>  {
>>       unsigned long flags;
>>       int was_empty;
>>
>> -     spin_lock_irqsave(&blkbk->pending_free_lock, flags);
>> -     was_empty = list_empty(&blkbk->pending_free);
>> -     list_add(&req->free_list, &blkbk->pending_free);
>> -     spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
>> +     spin_lock_irqsave(&blkif->pending_free_lock, flags);
>> +     was_empty = list_empty(&blkif->pending_free);
>> +     list_add(&req->free_list, &blkif->pending_free);
>> +     spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
>>       if (was_empty)
>> -             wake_up(&blkbk->pending_free_wq);
>> +             wake_up(&blkif->pending_free_wq);
>>  }
>>
>>  /*
>> @@ -635,8 +589,8 @@ int xen_blkif_schedule(void *arg)
>>               if (timeout == 0)
>>                       goto purge_gnt_list;
>>               timeout = wait_event_interruptible_timeout(
>> -                     blkbk->pending_free_wq,
>> -                     !list_empty(&blkbk->pending_free) ||
>> +                     blkif->pending_free_wq,
>> +                     !list_empty(&blkif->pending_free) ||
>>                       kthread_should_stop(),
>>                       timeout);
>>               if (timeout == 0)
>> @@ -883,7 +837,7 @@ static int dispatch_other_io(struct xen_blkif
*blkif,
>>                            struct blkif_request *req,
>>                            struct pending_req *pending_req)
>>  {
>> -     free_req(pending_req);
>> +     free_req(blkif, pending_req);
>>       make_response(blkif, req->u.other.id, req->operation,
>>                     BLKIF_RSP_EOPNOTSUPP);
>>       return -EIO;
>> @@ -943,7 +897,7 @@ static void __end_block_io_op(struct pending_req
*pending_req, int error)
>>                       if
(atomic_read(&pending_req->blkif->drain))
>>                              
complete(&pending_req->blkif->drain_complete);
>>               }
>> -             free_req(pending_req);
>> +             free_req(pending_req->blkif, pending_req);
>>       }
>>  }
>>
>> @@ -986,7 +940,7 @@ __do_block_io_op(struct xen_blkif *blkif)
>>                       break;
>>               }
>>
>> -             pending_req = alloc_req();
>> +             pending_req = alloc_req(blkif);
>>               if (NULL == pending_req) {
>>                       blkif->st_oo_req++;
>>                       more_to_do = 1;
>> @@ -1020,7 +974,7 @@ __do_block_io_op(struct xen_blkif *blkif)
>>                               goto done;
>>                       break;
>>               case BLKIF_OP_DISCARD:
>> -                     free_req(pending_req);
>> +                     free_req(blkif, pending_req);
>>                       if (dispatch_discard_io(blkif, &req))
>>                               goto done;
>>                       break;
>> @@ -1222,7 +1176,7 @@ static int dispatch_rw_block_io(struct xen_blkif
*blkif,
>>   fail_response:
>>       /* Haven''t submitted any bio''s yet. */
>>       make_response(blkif, req->u.rw.id, req->operation,
BLKIF_RSP_ERROR);
>> -     free_req(pending_req);
>> +     free_req(blkif, pending_req);
>>       msleep(1); /* back off a bit */
>>       return -EIO;
>>
>> @@ -1279,51 +1233,20 @@ static void make_response(struct xen_blkif
*blkif, u64 id,
>>
>>  static int __init xen_blkif_init(void)
>>  {
>> -     int i;
>>       int rc = 0;
>>
>>       if (!xen_domain())
>>               return -ENODEV;
>>
>> -     blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
>> -     if (!blkbk) {
>> -             pr_alert(DRV_PFX "%s: out of memory!\n",
__func__);
>> -             return -ENOMEM;
>> -     }
>> -
>> -
>> -     blkbk->pending_reqs          =
kzalloc(sizeof(blkbk->pending_reqs[0]) *
>> -                                     xen_blkif_reqs, GFP_KERNEL);
>> -
>> -     if (!blkbk->pending_reqs) {
>> -             rc = -ENOMEM;
>> -             goto out_of_memory;
>> -     }
>> -
>>       rc = xen_blkif_interface_init();
>>       if (rc)
>>               goto failed_init;
>>
>> -     INIT_LIST_HEAD(&blkbk->pending_free);
>> -     spin_lock_init(&blkbk->pending_free_lock);
>> -     init_waitqueue_head(&blkbk->pending_free_wq);
>> -
>> -     for (i = 0; i < xen_blkif_reqs; i++)
>> -             list_add_tail(&blkbk->pending_reqs[i].free_list,
>> -                           &blkbk->pending_free);
>> -
>>       rc = xen_blkif_xenbus_init();
>>       if (rc)
>>               goto failed_init;
>>
>> -     return 0;
>> -
>> - out_of_memory:
>> -     pr_alert(DRV_PFX "%s: out of memory\n", __func__);
>>   failed_init:
>> -     kfree(blkbk->pending_reqs);
>> -     kfree(blkbk);
>> -     blkbk = NULL;
>>       return rc;
>>  }
>>
>> diff --git a/drivers/block/xen-blkback/common.h
b/drivers/block/xen-blkback/common.h
>> index f43782c..debff44 100644
>> --- a/drivers/block/xen-blkback/common.h
>> +++ b/drivers/block/xen-blkback/common.h
>> @@ -236,6 +236,14 @@ struct xen_blkif {
>>       int                     free_pages_num;
>>       struct list_head        free_pages;
>>
>> +     /* Allocation of pending_reqs */
>> +     struct pending_req      *pending_reqs;
>> +     /* List of all ''pending_req'' available */
>> +     struct list_head        pending_free;
>> +     /* And its spinlock. */
>> +     spinlock_t              pending_free_lock;
>> +     wait_queue_head_t       pending_free_wq;
>> +
>>       /* statistics */
>>       unsigned long           st_print;
>>       unsigned long long                      st_rd_req;
>> @@ -249,6 +257,25 @@ struct xen_blkif {
>>       wait_queue_head_t       waiting_to_free;
>>  };
>>
>> +/*
>> + * Each outstanding request that we''ve passed to the lower
device layers has a
>> + * ''pending_req'' allocated to it. Each buffer_head
that completes decrements
>> + * the pendcnt towards zero. When it hits zero, the specified domain
has a
>> + * response queued for it, with the saved ''id''
passed back.
>> + */
>> +struct pending_req {
>> +     struct xen_blkif        *blkif;
>> +     u64                     id;
>> +     int                     nr_pages;
>> +     atomic_t                pendcnt;
>> +     unsigned short          operation;
>> +     int                     status;
>> +     struct list_head        free_list;
>> +     struct page             *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>> +     struct persistent_gnt  
*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>> +     grant_handle_t         
grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>> +};
>> +
>>
>>  #define vbd_sz(_v)   ((_v)->bdev->bd_part ? \
>>                        (_v)->bdev->bd_part->nr_sects : \
>> diff --git a/drivers/block/xen-blkback/xenbus.c
b/drivers/block/xen-blkback/xenbus.c
>> index e0fd92a..bf7344f 100644
>> --- a/drivers/block/xen-blkback/xenbus.c
>> +++ b/drivers/block/xen-blkback/xenbus.c
>> @@ -30,6 +30,8 @@ struct backend_info {
>>       char                    *mode;
>>  };
>>
>> +extern int xen_blkif_reqs;
> 
> How about just sticking it in ''common.h''?
Done, I''ve replaced it with a define instead of a variable.
> 
>> +
>>  static struct kmem_cache *xen_blkif_cachep;
>>  static void connect(struct backend_info *);
>>  static int connect_ring(struct backend_info *);
>> @@ -105,6 +107,7 @@ static void xen_update_blkif_status(struct
xen_blkif *blkif)
>>  static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>>  {
>>       struct xen_blkif *blkif;
>> +     int i;
>>
>>       blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
>>       if (!blkif)
>> @@ -124,6 +127,21 @@ static struct xen_blkif *xen_blkif_alloc(domid_t
domid)
>>       blkif->free_pages_num = 0;
>>       atomic_set(&blkif->persistent_gnt_in_use, 0);
>>
>> +     blkif->pending_reqs = kcalloc(xen_blkif_reqs,
>> +                                   sizeof(blkif->pending_reqs[0]),
>> +                                   GFP_KERNEL);
>> +     if (!blkif->pending_reqs) {
>> +             kmem_cache_free(xen_blkif_cachep, blkif);
>> +             return ERR_PTR(-ENOMEM);
>> +     }
>> +     INIT_LIST_HEAD(&blkif->pending_free);
>> +     spin_lock_init(&blkif->pending_free_lock);
>> +     init_waitqueue_head(&blkif->pending_free_wq);
>> +
>> +     for (i = 0; i < xen_blkif_reqs; i++)
>> +             list_add_tail(&blkif->pending_reqs[i].free_list,
>> +                           &blkif->pending_free);
>> +
>>       return blkif;
>>  }
>>
>> @@ -205,6 +223,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
>>  {
>>       if (!atomic_dec_and_test(&blkif->refcnt))
>>               BUG();
> 
> For sanity, should we have something like this:
>         struct pending_req *p;
>         i = 0;
>         list_for_each_entry (p, &blkif->pending_free, free_list)
>                 i++;
> 
>         BUG_ON(i != xen_blkif_reqs);
> 
> As that would mean somebody did not call ''free_req'' and
we have a loose one laying
> around.
> 
> Or perhaps just leak the offending struct and then don''t do
''kfree''?
I prefer the BUG instead of leaking the struct, if we are indeed still
using a request better notice and fix it :)
> 
>> +     kfree(blkif->pending_reqs);
>>       kmem_cache_free(xen_blkif_cachep, blkif);
>>  }
>>
>> --
>> 1.7.7.5 (Apple Git-26)
>>

Konrad Rzeszutek Wilk

2013-Apr-17 14:16 UTC

head link

Re: [PATCH v1 5/7] xen-blkback: make the queue of free requests per backend

On Mon, Apr 15, 2013 at 03:50:38PM +0200, Roger Pau Monné
wrote:> On 09/04/13 18:13, Konrad Rzeszutek Wilk wrote:
> > On Wed, Mar 27, 2013 at 12:10:41PM +0100, Roger Pau Monne wrote:
> >> Remove the last dependency from blkbk by moving the list of free
> >> requests to blkif. This change reduces the contention on the list
of
> >> available requests.
> >>
> >> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> >> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> >> Cc: xen-devel@lists.xen.org
> >> ---
> >> Changes since RFC:
> >>  * Replace kzalloc with kcalloc.
> >> ---
> >>  drivers/block/xen-blkback/blkback.c |  125
+++++++----------------------------
> >>  drivers/block/xen-blkback/common.h  |   27 ++++++++
> >>  drivers/block/xen-blkback/xenbus.c  |   19 +++++
> >>  3 files changed, 70 insertions(+), 101 deletions(-)
> >>
> >> diff --git a/drivers/block/xen-blkback/blkback.c
b/drivers/block/xen-blkback/blkback.c
> >> index e6542d5..a1c9dad 100644
> >> --- a/drivers/block/xen-blkback/blkback.c
> >> +++ b/drivers/block/xen-blkback/blkback.c
> >> @@ -50,18 +50,14 @@
> >>  #include "common.h"
> >>
> >>  /*
> >> - * These are rather arbitrary. They are fairly large because
adjacent requests
> >> - * pulled from a communication ring are quite likely to end up
being part of
> >> - * the same scatter/gather request at the disc.
> >> - *
> >> - * ** TRY INCREASING ''xen_blkif_reqs'' IF WRITE
SPEEDS SEEM TOO LOW **
> >> - *
> >> - * This will increase the chances of being able to write whole
tracks.
> >> - * 64 should be enough to keep us competitive with Linux.
> >> + * This is the number of requests that will be pre-allocated for
each backend.
> >> + * For better performance this is set to RING_SIZE (32), so
requests
> >> + * in the ring will never have to wait for a free pending_req.
> > 
> > You might want to clarify that ''Any value less than that is
sure to
> > cause problems.''
> > 
> > At which point perhaps we should have some logic to enforce that?
> >>   */
> >> -static int xen_blkif_reqs = 64;
> >> +
> >> +int xen_blkif_reqs = 32;
> >>  module_param_named(reqs, xen_blkif_reqs, int, 0);
> >> -MODULE_PARM_DESC(reqs, "Number of blkback requests to
allocate");
> >> +MODULE_PARM_DESC(reqs, "Number of blkback requests to
allocate per backend");
> > 
> > 
> > Do we even need this anymore? I mean if the optimial size is 32 and
going
> > below that is silly. Going above that is silly as well - as we
won''t be using
> > the extra count.
> > 
> > Can we just rip this module parameter out?
> 
> Wiped out, it doesn''t make sense.
> 
> > 
> >>
> >>  /*
> >>   * Maximum number of unused free pages to keep in the internal
buffer.
> >> @@ -120,53 +116,11 @@ MODULE_PARM_DESC(lru_percent_clean,
> >>  static unsigned int log_stats;
> >>  module_param(log_stats, int, 0644);
> >>
> >> -/*
> >> - * Each outstanding request that we''ve passed to the
lower device layers has a
> >> - * ''pending_req'' allocated to it. Each
buffer_head that completes decrements
> >> - * the pendcnt towards zero. When it hits zero, the specified
domain has a
> >> - * response queued for it, with the saved ''id''
passed back.
> >> - */
> >> -struct pending_req {
> >> -     struct xen_blkif        *blkif;
> >> -     u64                     id;
> >> -     int                     nr_pages;
> >> -     atomic_t                pendcnt;
> >> -     unsigned short          operation;
> >> -     int                     status;
> >> -     struct list_head        free_list;
> >> -     struct page            
*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> -     struct persistent_gnt  
*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> -     grant_handle_t         
grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> -};
> >> -
> >>  #define BLKBACK_INVALID_HANDLE (~0)
> >>
> >>  /* Number of free pages to remove on each call to
free_xenballooned_pages */
> >>  #define NUM_BATCH_FREE_PAGES 10
> >>
> >> -struct xen_blkbk {
> >> -     struct pending_req      *pending_reqs;
> >> -     /* List of all ''pending_req'' available */
> >> -     struct list_head        pending_free;
> >> -     /* And its spinlock. */
> >> -     spinlock_t              pending_free_lock;
> >> -     wait_queue_head_t       pending_free_wq;
> >> -};
> >> -
> >> -static struct xen_blkbk *blkbk;
> >> -
> >> -/*
> >> - * Little helpful macro to figure out the index and virtual
address of the
> >> - * pending_pages[..]. For each ''pending_req'' we
have have up to
> >> - * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be
from 0 through
> >> - * 10 and would index in the pending_pages[..].
> >> - */
> >> -static inline int vaddr_pagenr(struct pending_req *req, int seg)
> >> -{
> >> -     return (req - blkbk->pending_reqs) *
> >> -             BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
> >> -}
> >> -
> >>  static inline int get_free_page(struct xen_blkif *blkif, struct
page **page)
> >>  {
> >>       unsigned long flags;
> >> @@ -471,18 +425,18 @@ finished:
> >>  /*
> >>   * Retrieve from the ''pending_reqs'' a free
pending_req structure to be used.
> >>   */
> >> -static struct pending_req *alloc_req(void)
> >> +static struct pending_req *alloc_req(struct xen_blkif *blkif)
> >>  {
> >>       struct pending_req *req = NULL;
> >>       unsigned long flags;
> >>
> >> -     spin_lock_irqsave(&blkbk->pending_free_lock, flags);
> >> -     if (!list_empty(&blkbk->pending_free)) {
> >> -             req = list_entry(blkbk->pending_free.next, struct
pending_req,
> >> +     spin_lock_irqsave(&blkif->pending_free_lock, flags);
> >> +     if (!list_empty(&blkif->pending_free)) {
> >> +             req = list_entry(blkif->pending_free.next, struct
pending_req,
> >>                                free_list);
> >>               list_del(&req->free_list);
> >>       }
> >> -     spin_unlock_irqrestore(&blkbk->pending_free_lock,
flags);
> >> +     spin_unlock_irqrestore(&blkif->pending_free_lock,
flags);
> >>       return req;
> >>  }
> >>
> >> @@ -490,17 +444,17 @@ static struct pending_req *alloc_req(void)
> >>   * Return the ''pending_req'' structure back to
the freepool. We also
> >>   * wake up the thread if it was waiting for a free page.
> >>   */
> >> -static void free_req(struct pending_req *req)
> >> +static void free_req(struct xen_blkif *blkif, struct pending_req
*req)
> >>  {
> >>       unsigned long flags;
> >>       int was_empty;
> >>
> >> -     spin_lock_irqsave(&blkbk->pending_free_lock, flags);
> >> -     was_empty = list_empty(&blkbk->pending_free);
> >> -     list_add(&req->free_list,
&blkbk->pending_free);
> >> -     spin_unlock_irqrestore(&blkbk->pending_free_lock,
flags);
> >> +     spin_lock_irqsave(&blkif->pending_free_lock, flags);
> >> +     was_empty = list_empty(&blkif->pending_free);
> >> +     list_add(&req->free_list,
&blkif->pending_free);
> >> +     spin_unlock_irqrestore(&blkif->pending_free_lock,
flags);
> >>       if (was_empty)
> >> -             wake_up(&blkbk->pending_free_wq);
> >> +             wake_up(&blkif->pending_free_wq);
> >>  }
> >>
> >>  /*
> >> @@ -635,8 +589,8 @@ int xen_blkif_schedule(void *arg)
> >>               if (timeout == 0)
> >>                       goto purge_gnt_list;
> >>               timeout = wait_event_interruptible_timeout(
> >> -                     blkbk->pending_free_wq,
> >> -                     !list_empty(&blkbk->pending_free) ||
> >> +                     blkif->pending_free_wq,
> >> +                     !list_empty(&blkif->pending_free) ||
> >>                       kthread_should_stop(),
> >>                       timeout);
> >>               if (timeout == 0)
> >> @@ -883,7 +837,7 @@ static int dispatch_other_io(struct xen_blkif
*blkif,
> >>                            struct blkif_request *req,
> >>                            struct pending_req *pending_req)
> >>  {
> >> -     free_req(pending_req);
> >> +     free_req(blkif, pending_req);
> >>       make_response(blkif, req->u.other.id, req->operation,
> >>                     BLKIF_RSP_EOPNOTSUPP);
> >>       return -EIO;
> >> @@ -943,7 +897,7 @@ static void __end_block_io_op(struct
pending_req *pending_req, int error)
> >>                       if
(atomic_read(&pending_req->blkif->drain))
> >>                              
complete(&pending_req->blkif->drain_complete);
> >>               }
> >> -             free_req(pending_req);
> >> +             free_req(pending_req->blkif, pending_req);
> >>       }
> >>  }
> >>
> >> @@ -986,7 +940,7 @@ __do_block_io_op(struct xen_blkif *blkif)
> >>                       break;
> >>               }
> >>
> >> -             pending_req = alloc_req();
> >> +             pending_req = alloc_req(blkif);
> >>               if (NULL == pending_req) {
> >>                       blkif->st_oo_req++;
> >>                       more_to_do = 1;
> >> @@ -1020,7 +974,7 @@ __do_block_io_op(struct xen_blkif *blkif)
> >>                               goto done;
> >>                       break;
> >>               case BLKIF_OP_DISCARD:
> >> -                     free_req(pending_req);
> >> +                     free_req(blkif, pending_req);
> >>                       if (dispatch_discard_io(blkif, &req))
> >>                               goto done;
> >>                       break;
> >> @@ -1222,7 +1176,7 @@ static int dispatch_rw_block_io(struct
xen_blkif *blkif,
> >>   fail_response:
> >>       /* Haven''t submitted any bio''s yet. */
> >>       make_response(blkif, req->u.rw.id, req->operation,
BLKIF_RSP_ERROR);
> >> -     free_req(pending_req);
> >> +     free_req(blkif, pending_req);
> >>       msleep(1); /* back off a bit */
> >>       return -EIO;
> >>
> >> @@ -1279,51 +1233,20 @@ static void make_response(struct xen_blkif
*blkif, u64 id,
> >>
> >>  static int __init xen_blkif_init(void)
> >>  {
> >> -     int i;
> >>       int rc = 0;
> >>
> >>       if (!xen_domain())
> >>               return -ENODEV;
> >>
> >> -     blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
> >> -     if (!blkbk) {
> >> -             pr_alert(DRV_PFX "%s: out of memory!\n",
__func__);
> >> -             return -ENOMEM;
> >> -     }
> >> -
> >> -
> >> -     blkbk->pending_reqs          =
kzalloc(sizeof(blkbk->pending_reqs[0]) *
> >> -                                     xen_blkif_reqs, GFP_KERNEL);
> >> -
> >> -     if (!blkbk->pending_reqs) {
> >> -             rc = -ENOMEM;
> >> -             goto out_of_memory;
> >> -     }
> >> -
> >>       rc = xen_blkif_interface_init();
> >>       if (rc)
> >>               goto failed_init;
> >>
> >> -     INIT_LIST_HEAD(&blkbk->pending_free);
> >> -     spin_lock_init(&blkbk->pending_free_lock);
> >> -     init_waitqueue_head(&blkbk->pending_free_wq);
> >> -
> >> -     for (i = 0; i < xen_blkif_reqs; i++)
> >> -            
list_add_tail(&blkbk->pending_reqs[i].free_list,
> >> -                           &blkbk->pending_free);
> >> -
> >>       rc = xen_blkif_xenbus_init();
> >>       if (rc)
> >>               goto failed_init;
> >>
> >> -     return 0;
> >> -
> >> - out_of_memory:
> >> -     pr_alert(DRV_PFX "%s: out of memory\n", __func__);
> >>   failed_init:
> >> -     kfree(blkbk->pending_reqs);
> >> -     kfree(blkbk);
> >> -     blkbk = NULL;
> >>       return rc;
> >>  }
> >>
> >> diff --git a/drivers/block/xen-blkback/common.h
b/drivers/block/xen-blkback/common.h
> >> index f43782c..debff44 100644
> >> --- a/drivers/block/xen-blkback/common.h
> >> +++ b/drivers/block/xen-blkback/common.h
> >> @@ -236,6 +236,14 @@ struct xen_blkif {
> >>       int                     free_pages_num;
> >>       struct list_head        free_pages;
> >>
> >> +     /* Allocation of pending_reqs */
> >> +     struct pending_req      *pending_reqs;
> >> +     /* List of all ''pending_req'' available */
> >> +     struct list_head        pending_free;
> >> +     /* And its spinlock. */
> >> +     spinlock_t              pending_free_lock;
> >> +     wait_queue_head_t       pending_free_wq;
> >> +
> >>       /* statistics */
> >>       unsigned long           st_print;
> >>       unsigned long long                      st_rd_req;
> >> @@ -249,6 +257,25 @@ struct xen_blkif {
> >>       wait_queue_head_t       waiting_to_free;
> >>  };
> >>
> >> +/*
> >> + * Each outstanding request that we''ve passed to the
lower device layers has a
> >> + * ''pending_req'' allocated to it. Each
buffer_head that completes decrements
> >> + * the pendcnt towards zero. When it hits zero, the specified
domain has a
> >> + * response queued for it, with the saved ''id''
passed back.
> >> + */
> >> +struct pending_req {
> >> +     struct xen_blkif        *blkif;
> >> +     u64                     id;
> >> +     int                     nr_pages;
> >> +     atomic_t                pendcnt;
> >> +     unsigned short          operation;
> >> +     int                     status;
> >> +     struct list_head        free_list;
> >> +     struct page            
*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> +     struct persistent_gnt  
*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> +     grant_handle_t         
grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> >> +};
> >> +
> >>
> >>  #define vbd_sz(_v)   ((_v)->bdev->bd_part ? \
> >>                        (_v)->bdev->bd_part->nr_sects : \
> >> diff --git a/drivers/block/xen-blkback/xenbus.c
b/drivers/block/xen-blkback/xenbus.c
> >> index e0fd92a..bf7344f 100644
> >> --- a/drivers/block/xen-blkback/xenbus.c
> >> +++ b/drivers/block/xen-blkback/xenbus.c
> >> @@ -30,6 +30,8 @@ struct backend_info {
> >>       char                    *mode;
> >>  };
> >>
> >> +extern int xen_blkif_reqs;
> > 
> > How about just sticking it in ''common.h''?
> 
> Done, I''ve replaced it with a define instead of a variable.
> 
> > 
> >> +
> >>  static struct kmem_cache *xen_blkif_cachep;
> >>  static void connect(struct backend_info *);
> >>  static int connect_ring(struct backend_info *);
> >> @@ -105,6 +107,7 @@ static void xen_update_blkif_status(struct
xen_blkif *blkif)
> >>  static struct xen_blkif *xen_blkif_alloc(domid_t domid)
> >>  {
> >>       struct xen_blkif *blkif;
> >> +     int i;
> >>
> >>       blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
> >>       if (!blkif)
> >> @@ -124,6 +127,21 @@ static struct xen_blkif
*xen_blkif_alloc(domid_t domid)
> >>       blkif->free_pages_num = 0;
> >>       atomic_set(&blkif->persistent_gnt_in_use, 0);
> >>
> >> +     blkif->pending_reqs = kcalloc(xen_blkif_reqs,
> >> +                                  
sizeof(blkif->pending_reqs[0]),
> >> +                                   GFP_KERNEL);
> >> +     if (!blkif->pending_reqs) {
> >> +             kmem_cache_free(xen_blkif_cachep, blkif);
> >> +             return ERR_PTR(-ENOMEM);
> >> +     }
> >> +     INIT_LIST_HEAD(&blkif->pending_free);
> >> +     spin_lock_init(&blkif->pending_free_lock);
> >> +     init_waitqueue_head(&blkif->pending_free_wq);
> >> +
> >> +     for (i = 0; i < xen_blkif_reqs; i++)
> >> +            
list_add_tail(&blkif->pending_reqs[i].free_list,
> >> +                           &blkif->pending_free);
> >> +
> >>       return blkif;
> >>  }
> >>
> >> @@ -205,6 +223,7 @@ static void xen_blkif_free(struct xen_blkif
*blkif)
> >>  {
> >>       if (!atomic_dec_and_test(&blkif->refcnt))
> >>               BUG();
> > 
> > For sanity, should we have something like this:
> >         struct pending_req *p;
> >         i = 0;
> >         list_for_each_entry (p, &blkif->pending_free,
free_list)
> >                 i++;
> > 
> >         BUG_ON(i != xen_blkif_reqs);
> > 
> > As that would mean somebody did not call ''free_req''
and we have a loose one laying
> > around.
> > 
> > Or perhaps just leak the offending struct and then don''t do
''kfree''?
> 
> I prefer the BUG instead of leaking the struct, if we are indeed still
> using a request better notice and fix it :)
OK. Lets leave it with a BUG.> 
> > 
> >> +     kfree(blkif->pending_reqs);
> >>       kmem_cache_free(xen_blkif_cachep, blkif);
> >>  }
> >>
> >> --
> >> 1.7.7.5 (Apple Git-26)
> >>
>

Xen devel - Mar 2013 - [PATCH v1 5/7] xen-blkback: make the queue of free requests per backend

[PATCH v1 5/7] xen-blkback: make the queue of free requests per backend

Re: [PATCH v1 5/7] xen-blkback: make the queue of free requests per backend

Re: [PATCH v1 5/7] xen-blkback: make the queue of free requests per backend

Re: [PATCH v1 5/7] xen-blkback: make the queue of free requests per backend