thr3ads.net - Btrfs devel - [PATCH 02/12] Btrfs: Kill allocate_wait in space

If this information is useful, please help other people find it:
Share via:

Yan, Zheng

2010-Apr-19 10:45 UTC

[PATCH 02/12] Btrfs: Kill allocate_wait in space_info

We already have fs_info->chunk_mutex to avoid concurrent
chunk creation.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>

---
diff -urp 2/fs/btrfs/ctree.h 3/fs/btrfs/ctree.h
--- 2/fs/btrfs/ctree.h	2010-04-18 08:12:22.086699485 +0800
+++ 3/fs/btrfs/ctree.h	2010-04-18 08:13:15.457699211 +0800
@@ -700,9 +700,7 @@ struct btrfs_space_info {
 	struct list_head list;
 
 	/* for controlling how we free up space for allocations */
-	wait_queue_head_t allocate_wait;
 	wait_queue_head_t flush_wait;
-	int allocating_chunk;
 	int flushing;
 
 	/* for block groups in our same type */
diff -urp 2/fs/btrfs/extent-tree.c 3/fs/btrfs/extent-tree.c
--- 2/fs/btrfs/extent-tree.c	2010-04-18 08:12:22.092698714 +0800
+++ 3/fs/btrfs/extent-tree.c	2010-04-18 08:13:15.463699138 +0800
@@ -70,6 +70,9 @@ static int find_next_key(struct btrfs_pa
 			 struct btrfs_key *key);
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
 			    int dump_block_groups);
+static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_space_info *sinfo, u64 num_bytes);
 
 static noinline int
 block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2687,7 +2690,6 @@ static int update_space_info(struct btrf
 		INIT_LIST_HEAD(&found->block_groups[i]);
 	init_rwsem(&found->groups_sem);
 	init_waitqueue_head(&found->flush_wait);
-	init_waitqueue_head(&found->allocate_wait);
 	spin_lock_init(&found->lock);
 	found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
 				BTRFS_BLOCK_GROUP_SYSTEM |
@@ -3000,71 +3002,6 @@ flush:
 	wake_up(&info->flush_wait);
 }
 
-static int maybe_allocate_chunk(struct btrfs_root *root,
-				 struct btrfs_space_info *info)
-{
-	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
-	struct btrfs_trans_handle *trans;
-	bool wait = false;
-	int ret = 0;
-	u64 min_metadata;
-	u64 free_space;
-
-	free_space = btrfs_super_total_bytes(disk_super);
-	/*
-	 * we allow the metadata to grow to a max of either 10gb or 5% of the
-	 * space in the volume.
-	 */
-	min_metadata = min((u64)10 * 1024 * 1024 * 1024,
-			     div64_u64(free_space * 5, 100));
-	if (info->total_bytes >= min_metadata) {
-		spin_unlock(&info->lock);
-		return 0;
-	}
-
-	if (info->full) {
-		spin_unlock(&info->lock);
-		return 0;
-	}
-
-	if (!info->allocating_chunk) {
-		info->force_alloc = 1;
-		info->allocating_chunk = 1;
-	} else {
-		wait = true;
-	}
-
-	spin_unlock(&info->lock);
-
-	if (wait) {
-		wait_event(info->allocate_wait,
-			   !info->allocating_chunk);
-		return 1;
-	}
-
-	trans = btrfs_start_transaction(root, 1);
-	if (!trans) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-			     4096 + 2 * 1024 * 1024,
-			     info->flags, 0);
-	btrfs_end_transaction(trans, root);
-	if (ret)
-		goto out;
-out:
-	spin_lock(&info->lock);
-	info->allocating_chunk = 0;
-	spin_unlock(&info->lock);
-	wake_up(&info->allocate_wait);
-
-	if (ret)
-		return 0;
-	return 1;
-}
-
 /*
  * Reserve metadata space for delalloc.
  */
@@ -3105,7 +3042,8 @@ again:
 		flushed++;
 
 		if (flushed == 1) {
-			if (maybe_allocate_chunk(root, meta_sinfo))
+			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
+						 num_bytes))
 				goto again;
 			flushed++;
 		} else {
@@ -3220,7 +3158,8 @@ again:
 	if (used > meta_sinfo->total_bytes) {
 		retries++;
 		if (retries == 1) {
-			if (maybe_allocate_chunk(root, meta_sinfo))
+			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
+						 num_bytes))
 				goto again;
 			retries++;
 		} else {
@@ -3417,13 +3356,28 @@ static void force_metadata_allocation(st
 	rcu_read_unlock();
 }
 
+static int should_alloc_chunk(struct btrfs_space_info *sinfo,
+			      u64 alloc_bytes)
+{
+	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+
+	if (sinfo->bytes_used + sinfo->bytes_reserved +
+	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+		return 0;
+
+	if (sinfo->bytes_used + sinfo->bytes_reserved +
+	    alloc_bytes < div_factor(num_bytes, 8))
+		return 0;
+
+	return 1;
+}
+
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
 			  u64 flags, int force)
 {
 	struct btrfs_space_info *space_info;
 	struct btrfs_fs_info *fs_info = extent_root->fs_info;
-	u64 thresh;
 	int ret = 0;
 
 	mutex_lock(&fs_info->chunk_mutex);
@@ -3446,11 +3400,7 @@ static int do_chunk_alloc(struct btrfs_t
 		goto out;
 	}
 
-	thresh = space_info->total_bytes - space_info->bytes_readonly;
-	thresh = div_factor(thresh, 8);
-	if (!force &&
-	   (space_info->bytes_used + space_info->bytes_pinned +
-	    space_info->bytes_reserved + alloc_bytes) < thresh) {
+	if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
 		spin_unlock(&space_info->lock);
 		goto out;
 	}
@@ -3472,6 +3422,8 @@ static int do_chunk_alloc(struct btrfs_t
 	spin_lock(&space_info->lock);
 	if (ret)
 		space_info->full = 1;
+	else
+		ret = 1;
 	space_info->force_alloc = 0;
 	spin_unlock(&space_info->lock);
 out:
@@ -3479,6 +3431,38 @@ out:
 	return ret;
 }
 
+static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_space_info *sinfo, u64 num_bytes)
+{
+	int ret;
+	int end_trans = 0;
+
+	if (sinfo->full)
+		return 0;
+
+	spin_lock(&sinfo->lock);
+	ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
+	spin_unlock(&sinfo->lock);
+	if (!ret)
+		return 0;
+
+	if (!trans) {
+		trans = btrfs_join_transaction(root, 1);
+		BUG_ON(IS_ERR(trans));
+		end_trans = 1;
+	}
+
+	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+			     num_bytes + 2 * 1024 * 1024,
+			     get_alloc_profile(root, sinfo->flags), 0);
+
+	if (end_trans)
+		btrfs_end_transaction(trans, root);
+
+	return ret == 1 ? 1 : 0;
+}
+
 static int update_block_group(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      u64 bytenr, u64 num_bytes, int alloc,
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Josef Bacik

2010-Apr-19 13:57 UTC

head link

Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info

On Mon, Apr 19, 2010 at 06:45:44PM +0800, Yan, Zheng
wrote:> We already have fs_info->chunk_mutex to avoid concurrent
> chunk creation.
> 
> Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
> 
> ---
> diff -urp 2/fs/btrfs/ctree.h 3/fs/btrfs/ctree.h
> --- 2/fs/btrfs/ctree.h	2010-04-18 08:12:22.086699485 +0800
> +++ 3/fs/btrfs/ctree.h	2010-04-18 08:13:15.457699211 +0800
> @@ -700,9 +700,7 @@ struct btrfs_space_info {
>  	struct list_head list;
>  
>  	/* for controlling how we free up space for allocations */
> -	wait_queue_head_t allocate_wait;
>  	wait_queue_head_t flush_wait;
> -	int allocating_chunk;
>  	int flushing;
>  
>  	/* for block groups in our same type */
> diff -urp 2/fs/btrfs/extent-tree.c 3/fs/btrfs/extent-tree.c
> --- 2/fs/btrfs/extent-tree.c	2010-04-18 08:12:22.092698714 +0800
> +++ 3/fs/btrfs/extent-tree.c	2010-04-18 08:13:15.463699138 +0800
> @@ -70,6 +70,9 @@ static int find_next_key(struct btrfs_pa
>  			 struct btrfs_key *key);
>  static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
>  			    int dump_block_groups);
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> +				struct btrfs_root *root,
> +				struct btrfs_space_info *sinfo, u64 num_bytes);
>  
>  static noinline int
>  block_group_cache_done(struct btrfs_block_group_cache *cache)
> @@ -2687,7 +2690,6 @@ static int update_space_info(struct btrf
>  		INIT_LIST_HEAD(&found->block_groups[i]);
>  	init_rwsem(&found->groups_sem);
>  	init_waitqueue_head(&found->flush_wait);
> -	init_waitqueue_head(&found->allocate_wait);
>  	spin_lock_init(&found->lock);
>  	found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
>  				BTRFS_BLOCK_GROUP_SYSTEM |
> @@ -3000,71 +3002,6 @@ flush:
>  	wake_up(&info->flush_wait);
>  }
>  
> -static int maybe_allocate_chunk(struct btrfs_root *root,
> -				 struct btrfs_space_info *info)
> -{
> -	struct btrfs_super_block *disk_super =
&root->fs_info->super_copy;
> -	struct btrfs_trans_handle *trans;
> -	bool wait = false;
> -	int ret = 0;
> -	u64 min_metadata;
> -	u64 free_space;
> -
> -	free_space = btrfs_super_total_bytes(disk_super);
> -	/*
> -	 * we allow the metadata to grow to a max of either 10gb or 5% of the
> -	 * space in the volume.
> -	 */
> -	min_metadata = min((u64)10 * 1024 * 1024 * 1024,
> -			     div64_u64(free_space * 5, 100));
> -	if (info->total_bytes >= min_metadata) {
> -		spin_unlock(&info->lock);
> -		return 0;
> -	}
> -
> -	if (info->full) {
> -		spin_unlock(&info->lock);
> -		return 0;
> -	}
> -
> -	if (!info->allocating_chunk) {
> -		info->force_alloc = 1;
> -		info->allocating_chunk = 1;
> -	} else {
> -		wait = true;
> -	}
> -
> -	spin_unlock(&info->lock);
> -
> -	if (wait) {
> -		wait_event(info->allocate_wait,
> -			   !info->allocating_chunk);
> -		return 1;
> -	}
> -
> -	trans = btrfs_start_transaction(root, 1);
> -	if (!trans) {
> -		ret = -ENOMEM;
> -		goto out;
> -	}
> -
> -	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> -			     4096 + 2 * 1024 * 1024,
> -			     info->flags, 0);
> -	btrfs_end_transaction(trans, root);
> -	if (ret)
> -		goto out;
> -out:
> -	spin_lock(&info->lock);
> -	info->allocating_chunk = 0;
> -	spin_unlock(&info->lock);
> -	wake_up(&info->allocate_wait);
> -
> -	if (ret)
> -		return 0;
> -	return 1;
> -}
> -
>  /*
>   * Reserve metadata space for delalloc.
>   */
> @@ -3105,7 +3042,8 @@ again:
>  		flushed++;
>  
>  		if (flushed == 1) {
> -			if (maybe_allocate_chunk(root, meta_sinfo))
> +			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> +						 num_bytes))
>  				goto again;
>  			flushed++;
>  		} else {
> @@ -3220,7 +3158,8 @@ again:
>  	if (used > meta_sinfo->total_bytes) {
>  		retries++;
>  		if (retries == 1) {
> -			if (maybe_allocate_chunk(root, meta_sinfo))
> +			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> +						 num_bytes))
>  				goto again;
>  			retries++;
>  		} else {
> @@ -3417,13 +3356,28 @@ static void force_metadata_allocation(st
>  	rcu_read_unlock();
>  }
>  
> +static int should_alloc_chunk(struct btrfs_space_info *sinfo,
> +			      u64 alloc_bytes)
> +{
> +	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
> +
> +	if (sinfo->bytes_used + sinfo->bytes_reserved +
> +	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
> +		return 0;
> +
> +	if (sinfo->bytes_used + sinfo->bytes_reserved +
> +	    alloc_bytes < div_factor(num_bytes, 8))
> +		return 0;
> +
> +	return 1;
> +}
> +
>  static int do_chunk_alloc(struct btrfs_trans_handle *trans,
>  			  struct btrfs_root *extent_root, u64 alloc_bytes,
>  			  u64 flags, int force)
>  {
>  	struct btrfs_space_info *space_info;
>  	struct btrfs_fs_info *fs_info = extent_root->fs_info;
> -	u64 thresh;
>  	int ret = 0;
>  
>  	mutex_lock(&fs_info->chunk_mutex);
> @@ -3446,11 +3400,7 @@ static int do_chunk_alloc(struct btrfs_t
>  		goto out;
>  	}
>  
> -	thresh = space_info->total_bytes - space_info->bytes_readonly;
> -	thresh = div_factor(thresh, 8);
> -	if (!force &&
> -	   (space_info->bytes_used + space_info->bytes_pinned +
> -	    space_info->bytes_reserved + alloc_bytes) < thresh) {
> +	if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
>  		spin_unlock(&space_info->lock);
>  		goto out;
>  	}
> @@ -3472,6 +3422,8 @@ static int do_chunk_alloc(struct btrfs_t
>  	spin_lock(&space_info->lock);
>  	if (ret)
>  		space_info->full = 1;
> +	else
> +		ret = 1;
>  	space_info->force_alloc = 0;
>  	spin_unlock(&space_info->lock);
>  out:
> @@ -3479,6 +3431,38 @@ out:
>  	return ret;
>  }
>  
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> +				struct btrfs_root *root,
> +				struct btrfs_space_info *sinfo, u64 num_bytes)
> +{
> +	int ret;
> +	int end_trans = 0;
> +
> +	if (sinfo->full)
> +		return 0;
> +
maybe_allocate_chunk is called with the info->lock already held, this will
deadlock.
> +	spin_lock(&sinfo->lock);
> +	ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
> +	spin_unlock(&sinfo->lock);
> +	if (!ret)
> +		return 0;
> +
> +	if (!trans) {
> +		trans = btrfs_join_transaction(root, 1);
> +		BUG_ON(IS_ERR(trans));
> +		end_trans = 1;
> +	}
> +
> +	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> +			     num_bytes + 2 * 1024 * 1024,
> +			     get_alloc_profile(root, sinfo->flags), 0);
> +
> +	if (end_trans)
> +		btrfs_end_transaction(trans, root);
> +
> +	return ret == 1 ? 1 : 0;
> +}
> +
>  static int update_block_group(struct btrfs_trans_handle *trans,
>  			      struct btrfs_root *root,
>  			      u64 bytenr, u64 num_bytes, int alloc,
The purpose of maybe_allocate_chunk was that there is no way to know if some
other CPU is currently trying to allocate a chunk for the given space info.  We
could have two cpu''s come inot do_chunk_alloc at relatively the same
time and
end up allocating twice the amount of space, which is why I did the waitqueue
thing.  It seems like this is still a possibility with your patch.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Yan, Zheng

2010-Apr-19 14:46 UTC

head link

Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info

On Mon, Apr 19, 2010 at 9:57 PM, Josef Bacik <josef@redhat.com>
wrote:> The purpose of maybe_allocate_chunk was that there is no way to know if
some
> other CPU is currently trying to allocate a chunk for the given space info.
 We
> could have two cpu''s come inot do_chunk_alloc at relatively the
same time and
> end up allocating twice the amount of space, which is why I did the
waitqueue
> thing.  It seems like this is still a possibility with your patch.  Thanks,
>This is impossible because the very first thing do_chunk_alloc does is
lock the chunk_mutex.

Yan, Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Josef Bacik

2010-Apr-19 14:48 UTC

head link

Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info

On Mon, Apr 19, 2010 at 10:46:12PM +0800, Yan, Zheng
wrote:> On Mon, Apr 19, 2010 at 9:57 PM, Josef Bacik <josef@redhat.com>
wrote:
> > The purpose of maybe_allocate_chunk was that there is no way to know
if some
> > other CPU is currently trying to allocate a chunk for the given space
info.  We
> > could have two cpu''s come inot do_chunk_alloc at relatively
the same time and
> > end up allocating twice the amount of space, which is why I did the
waitqueue
> > thing.  It seems like this is still a possibility with your patch.
 Thanks,
> >
> This is impossible because the very first thing do_chunk_alloc does is
> lock the chunk_mutex.
> 
Sure, that just means we don''t get two things creating chunks at the
same time,
but not from creating them one right after another.  So CPU 0 and 1 come in to
the check free space stuff, realize they need to allocate a chunk, and race to
call do_chunk_alloc.  One of them wins, and the other blocks on the chunk_mutex
lock.  When the first finishes the other one is able to continue and do what it
was originally going to do, and then you get two chunks when you really only
wanted one.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Yan, Zheng

2010-Apr-19 15:34 UTC

head link

Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info

On Mon, Apr 19, 2010 at 10:48 PM, Josef Bacik <josef@redhat.com>
wrote:> On Mon, Apr 19, 2010 at 10:46:12PM +0800, Yan, Zheng wrote:
>> On Mon, Apr 19, 2010 at 9:57 PM, Josef Bacik <josef@redhat.com>
wrote:
>> > The purpose of maybe_allocate_chunk was that there is no way to
know if some
>> > other CPU is currently trying to allocate a chunk for the given
space info.  We
>> > could have two cpu''s come inot do_chunk_alloc at
relatively the same time and
>> > end up allocating twice the amount of space, which is why I did
the waitqueue
>> > thing.  It seems like this is still a possibility with your patch.
 Thanks,
>> >
>> This is impossible because the very first thing do_chunk_alloc does is
>> lock the chunk_mutex.
>>
>
> Sure, that just means we don''t get two things creating chunks at
the same time,
> but not from creating them one right after another.  So CPU 0 and 1 come in
to
> the check free space stuff, realize they need to allocate a chunk, and race
to
> call do_chunk_alloc.  One of them wins, and the other blocks on the
chunk_mutex
> lock.  When the first finishes the other one is able to continue and do
what it
> was originally going to do, and then you get two chunks when you really
only
> wanted one.  Thanks,
>
there is a check in do_chunk_alloc, so the later one will do nothing
if the first
call adds enough space.

Yan Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Btrfs devel - Apr 2010 - [PATCH 02/12] Btrfs: Kill allocate_wait in space_info

[PATCH 02/12] Btrfs: Kill allocate_wait in space_info

Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info

Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info

Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info

Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info