thr3ads.net - Btrfs devel - [PATCH 00/20] Here''s my current btrfs patchset [Nov 2011]

If this information is useful, please help other people find it:
Share via:

Alexandre Oliva

2011-Oct-14 15:10 UTC

[PATCH 13/20] Btrfs: revamp clustered allocation logic

Parameterize clusters on minimum total size, minimum chunk size and
minimum contiguous size for at least one chunk, without limits on
cluster, window or gap sizes.  Don''t tolerate any fragmentation for
SSD_SPREAD; accept it for metadata, but try to keep data dense.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/free-space-cache.c |  112 +++++++++++++++++++------------------------
 1 files changed, 49 insertions(+), 63 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index dd7fe43..3aa56e4 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2284,23 +2284,23 @@ out:
 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
 				struct btrfs_free_space *entry,
 				struct btrfs_free_cluster *cluster,
-				u64 offset, u64 bytes, u64 min_bytes)
+				u64 offset, u64 bytes,
+				u64 cont1_bytes, u64 min_bytes)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 	unsigned long next_zero;
 	unsigned long i;
-	unsigned long search_bits;
-	unsigned long total_bits;
+	unsigned long want_bits;
+	unsigned long min_bits;
 	unsigned long found_bits;
 	unsigned long start = 0;
 	unsigned long total_found = 0;
 	int ret;
-	bool found = false;
 
 	i = offset_to_bit(entry->offset, block_group->sectorsize,
 			  max_t(u64, offset, entry->offset));
-	search_bits = bytes_to_bits(bytes, block_group->sectorsize);
-	total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
+	want_bits = bytes_to_bits(bytes, block_group->sectorsize);
+	min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
 
 again:
 	found_bits = 0;
@@ -2309,7 +2309,7 @@ again:
 	     i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
 		next_zero = find_next_zero_bit(entry->bitmap,
 					       BITS_PER_BITMAP, i);
-		if (next_zero - i >= search_bits) {
+		if (next_zero - i >= min_bits) {
 			found_bits = next_zero - i;
 			break;
 		}
@@ -2319,10 +2319,9 @@ again:
 	if (!found_bits)
 		return -ENOSPC;
 
-	if (!found) {
+	if (!total_found) {
 		start = i;
 		cluster->max_size = 0;
-		found = true;
 	}
 
 	total_found += found_bits;
@@ -2330,13 +2329,8 @@ again:
 	if (cluster->max_size < found_bits * block_group->sectorsize)
 		cluster->max_size = found_bits * block_group->sectorsize;
 
-	if (total_found < total_bits) {
-		i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
-		if (i - start > total_bits * 2) {
-			total_found = 0;
-			cluster->max_size = 0;
-			found = false;
-		}
+	if (total_found < want_bits || cluster->max_size < cont1_bytes) {
+		i = next_zero + 1;
 		goto again;
 	}
 
@@ -2352,23 +2346,23 @@ again:
 
 /*
  * This searches the block group for just extents to fill the cluster with.
+ * Try to find a cluster with at least bytes total bytes, at least one
+ * extent of cont1_bytes, and other clusters of at least min_bytes.
  */
 static noinline int
 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
 			struct btrfs_free_cluster *cluster,
 			struct list_head *bitmaps, u64 offset, u64 bytes,
-			u64 min_bytes)
+			u64 cont1_bytes, u64 min_bytes)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 	struct btrfs_free_space *first = NULL;
 	struct btrfs_free_space *entry = NULL;
-	struct btrfs_free_space *prev = NULL;
 	struct btrfs_free_space *last;
 	struct rb_node *node;
 	u64 window_start;
 	u64 window_free;
 	u64 max_extent;
-	u64 max_gap = 128 * 1024;
 
 	entry = tree_search_offset(ctl, offset, 0, 1);
 	if (!entry)
@@ -2378,8 +2372,8 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache
*block_group,
 	 * We don''t want bitmaps, so just move along until we find a normal
 	 * extent entry.
 	 */
-	while (entry->bitmap) {
-		if (list_empty(&entry->list))
+	while (entry->bitmap || entry->bytes < min_bytes) {
+		if (entry->bitmap && list_empty(&entry->list))
 			list_add_tail(&entry->list, bitmaps);
 		else if (entry->bitmap)
 			printk(KERN_ERR "btrfs: not using (busy?!?) bitmap %lli\n",
@@ -2395,12 +2389,9 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache
*block_group,
 	max_extent = entry->bytes;
 	first = entry;
 	last = entry;
-	prev = entry;
 
-	while (window_free <= min_bytes) {
-		node = rb_next(&entry->offset_index);
-		if (!node)
-			return -ENOSPC;
+	for (node = rb_next(&entry->offset_index); node;
+	     node = rb_next(&entry->offset_index)) {
 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
 
 		if (entry->bitmap) {
@@ -2412,26 +2403,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache
*block_group,
 			continue;
 		}
 
-		/*
-		 * we haven''t filled the empty size and the window is
-		 * very large.  reset and try again
-		 */
-		if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
-		    entry->offset - window_start > (min_bytes * 2)) {
-			first = entry;
-			window_start = entry->offset;
-			window_free = entry->bytes;
-			last = entry;
+		if (entry->bytes < min_bytes)
+			continue;
+
+		last = entry;
+		window_free += entry->bytes;
+		if (entry->bytes > max_extent)
 			max_extent = entry->bytes;
-		} else {
-			last = entry;
-			window_free += entry->bytes;
-			if (entry->bytes > max_extent)
-				max_extent = entry->bytes;
-		}
-		prev = entry;
 	}
 
+	if (window_free < bytes || max_extent < cont1_bytes)
+		return -ENOSPC;
+
 	cluster->window_start = first->offset;
 
 	node = &first->offset_index;
@@ -2445,7 +2428,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache
*block_group,
 
 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
 		node = rb_next(&entry->offset_index);
-		if (entry->bitmap)
+		if (entry->bitmap || entry->bytes < min_bytes)
 			continue;
 
 		rb_erase(&entry->offset_index, &ctl->free_space_offset);
@@ -2467,7 +2450,7 @@ static noinline int
 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
 		     struct btrfs_free_cluster *cluster,
 		     struct list_head *bitmaps, u64 offset, u64 bytes,
-		     u64 min_bytes)
+		     u64 cont1_bytes, u64 min_bytes)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 	struct btrfs_free_space *entry;
@@ -2492,7 +2475,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache
*block_group,
 		if (entry->bytes < min_bytes)
 			continue;
 		ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
-					   bytes, min_bytes);
+					   bytes, cont1_bytes, min_bytes);
 		if (!ret)
 			return 0;
 	}
@@ -2506,7 +2489,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache
*block_group,
 
 /*
  * here we try to find a cluster of blocks in a block group.  The goal
- * is to find at least bytes free and up to empty_size + bytes free.
+ * is to find at least bytes+empty_size.
  * We might not find them all in one contiguous area.
  *
  * returns zero and sets up cluster if things worked out, otherwise
@@ -2522,23 +2505,24 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle
*trans,
 	struct btrfs_free_space *entry, *tmp;
 	LIST_HEAD(bitmaps);
 	u64 min_bytes;
+	u64 cont1_bytes;
 	int ret;
 
-	/* for metadata, allow allocates with more holes */
+	/*
+	 * Choose the minimum extent size we''ll require for this
+	 * cluster.  For SSD_SPREAD, don''t allow any fragmentation.
+	 * For metadata, allow allocates with smaller extents.  For
+	 * data, keep it dense.
+	 */
 	if (btrfs_test_opt(root, SSD_SPREAD)) {
-		min_bytes = bytes + empty_size;
+		cont1_bytes = min_bytes = bytes + empty_size;
 	} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
-		/*
-		 * we want to do larger allocations when we are
-		 * flushing out the delayed refs, it helps prevent
-		 * making more work as we go along.
-		 */
-		if (trans->transaction->delayed_refs.flushing)
-			min_bytes = max(bytes, (bytes + empty_size) >> 1);
-		else
-			min_bytes = max(bytes, (bytes + empty_size) >> 4);
-	} else
-		min_bytes = max(bytes, (bytes + empty_size) >> 2);
+		cont1_bytes = bytes;
+		min_bytes = block_group->sectorsize;
+	} else {
+		cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
+		min_bytes = block_group->sectorsize;
+	}
 
 	spin_lock(&ctl->tree_lock);
 
@@ -2546,7 +2530,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle
*trans,
 	 * If we know we don''t have enough space to make a cluster
don''t even
 	 * bother doing all the work to try and find one.
 	 */
-	if (ctl->free_space < min_bytes) {
+	if (ctl->free_space < bytes) {
 		spin_unlock(&ctl->tree_lock);
 		return -ENOSPC;
 	}
@@ -2560,10 +2544,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle
*trans,
 	}
 
 	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
-				      bytes, min_bytes);
+				      bytes + empty_size,
+				      cont1_bytes, min_bytes);
 	if (ret)
 		ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
-					   offset, bytes, min_bytes);
+					   offset, bytes + empty_size,
+					   cont1_bytes, min_bytes);
 
 	/* Clear our temporary list */
 	list_for_each_entry_safe(entry, tmp, &bitmaps, list)
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Oct-29 04:20 UTC

head link

[PATCH 17/20] Btrfs: introduce -o cluster and -o nocluster

Introduce -o nocluster to disable the use of clusters for extent
allocation, and -o cluster to reverse it.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/ctree.h       |    3 ++-
 fs/btrfs/extent-tree.c |    2 +-
 fs/btrfs/super.c       |   16 ++++++++++++++--
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 04a5dfc..1deaf2d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -971,7 +971,7 @@ struct btrfs_fs_info {
 	 * is required instead of the faster short fsync log commits
 	 */
 	u64 last_trans_log_full_commit;
-	unsigned long mount_opt:20;
+	unsigned long mount_opt:28;
 	unsigned long compress_type:4;
 	u64 max_inline;
 	u64 alloc_start;
@@ -1413,6 +1413,7 @@ struct btrfs_ioctl_defrag_range_args {
 #define BTRFS_MOUNT_AUTO_DEFRAG		(1 << 16)
 #define BTRFS_MOUNT_INODE_MAP_CACHE	(1 << 17)
 #define BTRFS_MOUNT_RECOVERY		(1 << 18)
+#define BTRFS_MOUNT_NO_ALLOC_CLUSTER	(1 << 19)
 
 #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7064979..7ddbf9b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5186,7 +5186,7 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
 	bool found_uncached_bg = false;
 	bool failed_cluster_refill = false;
 	bool failed_alloc = false;
-	bool use_cluster = true;
+	bool use_cluster = !btrfs_test_opt(root, NO_ALLOC_CLUSTER);
 	bool have_caching_bg = false;
 	u64 ideal_cache_percent = 0;
 	u64 ideal_cache_offset = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8bd9d6d..26b13d7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -164,7 +164,8 @@ enum {
 	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
 	Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
 	Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
-	Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err,
+	Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
+	Opt_nocluster, Opt_cluster, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -199,6 +200,8 @@ static match_table_t tokens = {
 	{Opt_inode_cache, "inode_cache"},
 	{Opt_no_space_cache, "nospace_cache"},
 	{Opt_recovery, "recovery"},
+	{Opt_nocluster, "nocluster"},
+	{Opt_cluster, "cluster"},
 	{Opt_err, NULL},
 };
 
@@ -390,12 +393,19 @@ int btrfs_parse_options(struct btrfs_root *root, char
*options)
 			btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
 			break;
 		case Opt_defrag:
-			printk(KERN_INFO "btrfs: enabling auto defrag");
+			printk(KERN_INFO "btrfs: enabling auto defrag\n");
 			btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
 			break;
 		case Opt_recovery:
 			printk(KERN_INFO "btrfs: enabling auto recovery");
 			btrfs_set_opt(info->mount_opt, RECOVERY);
+		case Opt_nocluster:
+			printk(KERN_INFO "btrfs: disabling alloc clustering\n");
+			btrfs_set_opt(info->mount_opt, NO_ALLOC_CLUSTER);
+			break;
+		case Opt_cluster:
+			printk(KERN_INFO "btrfs: enabling alloc clustering\n");
+			btrfs_clear_opt(info->mount_opt, NO_ALLOC_CLUSTER);
 			break;
 		case Opt_err:
 			printk(KERN_INFO "btrfs: unrecognized mount option "
@@ -722,6 +732,8 @@ static int btrfs_show_options(struct seq_file *seq, struct
vfsmount *vfs)
 		seq_puts(seq, ",autodefrag");
 	if (btrfs_test_opt(root, INODE_MAP_CACHE))
 		seq_puts(seq, ",inode_cache");
+	if (btrfs_test_opt(root, NO_ALLOC_CLUSTER))
+		seq_puts(seq, ",nocluster");
 	return 0;
 }
 
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-08 14:25 UTC

head link

[PATCH 14/20] Btrfs: introduce option to rebalance only metadata

Experimental patch to be able to compact only the metadata after
excessive block groups are created.  I guess it should be implemented
as a balance option rather than a separate ioctl, but this was good
enough for me to try it.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/ioctl.c   |    2 ++
 fs/btrfs/ioctl.h   |    3 +++
 fs/btrfs/volumes.c |   33 ++++++++++++++++++++++++++++-----
 fs/btrfs/volumes.h |    1 +
 4 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a90e749..6f53983 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3077,6 +3077,8 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_dev_info(root, argp);
 	case BTRFS_IOC_BALANCE:
 		return btrfs_balance(root->fs_info->dev_root);
+	case BTRFS_IOC_BALANCE_METADATA:
+		return btrfs_balance_metadata(root->fs_info->dev_root);
 	case BTRFS_IOC_CLONE:
 		return btrfs_ioctl_clone(file, arg, 0, 0, 0);
 	case BTRFS_IOC_CLONE_RANGE:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 252ae99..46bc428 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -277,4 +277,7 @@ struct btrfs_ioctl_logical_ino_args {
 #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
 					struct btrfs_ioctl_ino_path_args)
 
+#define BTRFS_IOC_BALANCE_METADATA _IOW(BTRFS_IOCTL_MAGIC, 37, \
+					struct btrfs_ioctl_vol_args)
+
 #endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7b348c2..db4397d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2084,7 +2084,7 @@ static u64 div_factor(u64 num, int factor)
 	return num;
 }
 
-int btrfs_balance(struct btrfs_root *dev_root)
+static int btrfs_balance_skip(struct btrfs_root *dev_root, u64 skip_type)
 {
 	int ret;
 	struct list_head *devices =
&dev_root->fs_info->fs_devices->devices;
@@ -2096,6 +2096,9 @@ int btrfs_balance(struct btrfs_root *dev_root)
 	struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_key found_key;
+	struct btrfs_chunk *chunk;
+	u64 chunk_type;
+	bool skip;
 
 	if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
 		return -EROFS;
@@ -2165,11 +2168,21 @@ int btrfs_balance(struct btrfs_root *dev_root)
 		if (found_key.offset == 0)
 			break;
 
+		if (skip_type) {
+			chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+					       struct btrfs_chunk);
+			chunk_type = btrfs_chunk_type(path->nodes[0], chunk);
+			skip = (chunk_type & skip_type);
+		} else
+			skip = false;
+
 		btrfs_release_path(path);
-		ret = btrfs_relocate_chunk(chunk_root,
-					   chunk_root->root_key.objectid,
-					   found_key.objectid,
-					   found_key.offset);
+
+		ret = (skip ? 0 :
+		       btrfs_relocate_chunk(chunk_root,
+					    chunk_root->root_key.objectid,
+					    found_key.objectid,
+					    found_key.offset));
 		if (ret && ret != -ENOSPC)
 			goto error;
 		key.offset = found_key.offset - 1;
@@ -2181,6 +2194,16 @@ error:
 	return ret;
 }
 
+int btrfs_balance(struct btrfs_root *dev_root)
+{
+	return btrfs_balance_skip(dev_root, 0);
+}
+
+int btrfs_balance_metadata(struct btrfs_root *dev_root)
+{
+	return btrfs_balance_skip(dev_root, BTRFS_BLOCK_GROUP_DATA);
+}
+
 /*
  * shrinking a device means finding all of the device extents past
  * the new size, and then following the back refs to the chunks.
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 78f2d4d..6844010 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -229,6 +229,7 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root
*root, u64 devid,
 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
 int btrfs_init_new_device(struct btrfs_root *root, char *path);
 int btrfs_balance(struct btrfs_root *dev_root);
+int btrfs_balance_metadata(struct btrfs_root *dev_root);
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
 int find_free_dev_extent(struct btrfs_trans_handle *trans,
 			 struct btrfs_device *device, u64 num_bytes,
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-08 14:33 UTC

head link

[PATCH 01/20] Btrfs: enable removal of second disk with raid1 metadata

Enable removal of a second disk even if that requires conversion of
metadata from raid1 to dup, but not when data would lose replication.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/volumes.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c37433d..7b348c2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1290,12 +1290,16 @@ int btrfs_rm_device(struct btrfs_root *root, char
*device_path)
 		goto out;
 	}
 
-	if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
+	if ((root->fs_info->avail_data_alloc_bits & BTRFS_BLOCK_GROUP_RAID1)
&&
 	    root->fs_info->fs_devices->num_devices <= 2) {
 		printk(KERN_ERR "btrfs: unable to go below two "
 		       "devices on raid1\n");
 		ret = -EINVAL;
 		goto out;
+	} else if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
+		   root->fs_info->fs_devices->num_devices <= 2) {
+		printk(KERN_ERR "btrfs: going below two devices "
+		       "will switch metadata from raid1 to dup\n");
 	}
 
 	if (strcmp(device_path, "missing") == 0) {
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-10 22:55 UTC

head link

[PATCH 18/20] Btrfs: add -o mincluster option

With -o mincluster, we save the location of the last successful
allocation, so as to emulate some of the cluster allocation logic
(though not non-bitmap preference) without actually going through the
exercise of allocating clusters.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c      |   16 +++++++++++++---
 fs/btrfs/free-space-cache.c |    1 +
 fs/btrfs/super.c            |   17 +++++++++++++----
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7ddbf9b..3c649fe 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5172,7 +5172,7 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
 {
 	int ret = 0;
 	struct btrfs_root *root = orig_root->fs_info->extent_root;
-	struct btrfs_free_cluster *last_ptr = NULL;
+	struct btrfs_free_cluster *last_ptr = NULL, *save_ptr = NULL;
 	struct btrfs_block_group_cache *block_group = NULL;
 	struct btrfs_block_group_cache *used_block_group;
 	int empty_cluster = 2 * 1024 * 1024;
@@ -5219,8 +5219,16 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
 		debug = 1;
 		debugid = atomic_inc_return(&debugcnt);
 		last_ptr = &root->fs_info->meta_alloc_cluster;
-		if (!btrfs_test_opt(root, SSD))
-			empty_cluster = 64 * 1024;
+		if (!btrfs_test_opt(root, SSD)) {
+			/* !SSD && SSD_SPREAD == -o mincluster.  */
+			if (btrfs_test_opt(root, SSD_SPREAD)) {
+				save_ptr = last_ptr;
+				hint_byte = save_ptr->window_start;
+				last_ptr = NULL;
+				use_cluster = false;
+			} else
+				empty_cluster = 64 * 1024;
+		}
 	}
 
 	if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
@@ -5556,6 +5564,8 @@ checks:
 			btrfs_add_free_space(used_block_group, offset,
 					     search_start - offset);
 		BUG_ON(offset > search_start);
+		if (save_ptr)
+			save_ptr->window_start = search_start + num_bytes;
 		if (used_block_group != block_group)
 			btrfs_put_block_group(used_block_group);
 		btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3aa56e4..953f7dd 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2579,6 +2579,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster
*cluster)
 	cluster->max_size = 0;
 	INIT_LIST_HEAD(&cluster->block_group_list);
 	cluster->block_group = NULL;
+	cluster->window_start = 0;
 }
 
 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 26b13d7..32fe064 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -165,7 +165,7 @@ enum {
 	Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
 	Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
 	Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
-	Opt_nocluster, Opt_cluster, Opt_err,
+	Opt_nocluster, Opt_cluster, Opt_mincluster, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -202,6 +202,7 @@ static match_table_t tokens = {
 	{Opt_recovery, "recovery"},
 	{Opt_nocluster, "nocluster"},
 	{Opt_cluster, "cluster"},
+	{Opt_mincluster, "mincluster"},
 	{Opt_err, NULL},
 };
 
@@ -407,6 +408,11 @@ int btrfs_parse_options(struct btrfs_root *root, char
*options)
 			printk(KERN_INFO "btrfs: enabling alloc clustering\n");
 			btrfs_clear_opt(info->mount_opt, NO_ALLOC_CLUSTER);
 			break;
+		case Opt_mincluster:
+			printk(KERN_INFO "btrfs: enabling minimal alloc clustering\n");
+			btrfs_clear_opt(info->mount_opt, NO_ALLOC_CLUSTER);
+			btrfs_set_opt(info->mount_opt, SSD_SPREAD);
+			break;
 		case Opt_err:
 			printk(KERN_INFO "btrfs: unrecognized mount option "
 			       "''%s''\n", p);
@@ -706,9 +712,12 @@ static int btrfs_show_options(struct seq_file *seq, struct
vfsmount *vfs)
 	}
 	if (btrfs_test_opt(root, NOSSD))
 		seq_puts(seq, ",nossd");
-	if (btrfs_test_opt(root, SSD_SPREAD))
-		seq_puts(seq, ",ssd_spread");
-	else if (btrfs_test_opt(root, SSD))
+	if (btrfs_test_opt(root, SSD_SPREAD)) {
+		if (btrfs_test_opt(root, SSD))
+			seq_puts(seq, ",ssd_spread");
+		else
+			seq_puts(seq, ",mincluster");
+	} else if (btrfs_test_opt(root, SSD))
 		seq_puts(seq, ",ssd");
 	if (btrfs_test_opt(root, NOTREELOG))
 		seq_puts(seq, ",notreelog");
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-16 03:25 UTC

head link

[PATCH 10/20] Btrfs: report reason for failed relocation

btrfs filesystem balance sometimes fails on corrupted filesystems, but
without any information that explains what the failure was to help
track down the problem.  This patch adds logging for nearly all error
conditions that may cause relocation to fail.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/relocation.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index dff29d5..15a2270 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2496,6 +2496,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 		if (!upper->eb) {
 			ret = btrfs_search_slot(trans, root, key, path, 0, 1);
 			if (ret < 0) {
+				printk(KERN_INFO "btrfs: searching slot %llu failed: %i\n",
key->objectid, -ret);
 				err = ret;
 				break;
 			}
@@ -2543,6 +2544,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 			btrfs_tree_unlock(eb);
 			free_extent_buffer(eb);
 			if (ret < 0) {
+				printk(KERN_INFO "btrfs: cow slot failed: %i\n", -ret);
 				err = ret;
 				goto next;
 			}
@@ -2730,6 +2732,7 @@ static int relocate_tree_block(struct btrfs_trans_handle
*trans,
 	BUG_ON(node->processed);
 	root = select_one_root(trans, node);
 	if (root == ERR_PTR(-ENOENT)) {
+		printk(KERN_INFO "btrfs: could not find a root to update\n");
 		update_processed_blocks(rc, node);
 		goto out;
 	}
@@ -2756,6 +2759,8 @@ static int relocate_tree_block(struct btrfs_trans_handle
*trans,
 			btrfs_release_path(path);
 			if (ret > 0)
 				ret = 0;
+			if (ret < 0)
+				printk(KERN_INFO "btrfs: failed to search slot %llu: %i\n",
key->objectid, -ret);
 		}
 		if (!ret)
 			update_processed_blocks(rc, node);
@@ -2813,12 +2818,14 @@ int relocate_tree_blocks(struct btrfs_trans_handle
*trans,
 					  block->level, block->bytenr);
 		if (IS_ERR(node)) {
 			err = PTR_ERR(node);
+			printk(KERN_INFO "btrfs: failed to build backref tree for key %llu byte
%llu: %i\n", block->key.objectid, block->bytenr, -err);
 			goto out;
 		}
 
 		ret = relocate_tree_block(trans, rc, node, &block->key,
 					  path);
 		if (ret < 0) {
+			printk(KERN_INFO "btrfs: failed to relocate tree block: %i\n",
-ret);
 			if (ret != -EAGAIN || rb_node == rb_first(blocks))
 				err = ret;
 			goto out;
@@ -3770,6 +3777,7 @@ restart:
 			ret = relocate_tree_blocks(trans, rc, &blocks);
 			if (ret < 0) {
 				if (ret != -EAGAIN) {
+					printk(KERN_INFO "btrfs: failed to relocate blocks for key %llu:
%i\n", key.objectid, -ret);
 					err = ret;
 					break;
 				}
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-25 23:47 UTC

head link

[PATCH 20/20] Btrfs: don''t waste metadata block groups for clustered allocation

We try to maintain about 1% of the filesystem space in free space in
data block groups, but we need not do that for metadata, since we only
allocate one block at a time.

This patch also moves the adjustment of flags to account for mixed
data/metadata block groups into the block protected by spin lock, and
before the point in which we now look at flags to decide whether or
not we should keep the free space buffer.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c |   24 +++++++++++++-----------
 1 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3c649fe..cce452d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3228,7 +3228,7 @@ static void force_metadata_allocation(struct btrfs_fs_info
*info)
 
 static int should_alloc_chunk(struct btrfs_root *root,
 			      struct btrfs_space_info *sinfo, u64 alloc_bytes,
-			      int force)
+			      u64 flags, int force)
 {
 	struct btrfs_block_rsv *global_rsv =
&root->fs_info->global_block_rsv;
 	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
@@ -3246,10 +3246,10 @@ static int should_alloc_chunk(struct btrfs_root *root,
 	num_allocated += global_rsv->size;
 
 	/*
-	 * in limited mode, we want to have some free space up to
+	 * in limited mode, we want to have some free data space up to
 	 * about 1% of the FS size.
 	 */
-	if (force == CHUNK_ALLOC_LIMITED) {
+	if (force == CHUNK_ALLOC_LIMITED && (flags &
BTRFS_BLOCK_GROUP_DATA)) {
 		thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
 		thresh = max_t(u64, 64 * 1024 * 1024,
 			       div_factor_fine(thresh, 1));
@@ -3310,7 +3310,16 @@ again:
 		return 0;
 	}
 
-	if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
+	/*
+	 * If we have mixed data/metadata chunks we want to make sure we keep
+	 * allocating mixed chunks instead of individual chunks.
+	 */
+	if (btrfs_mixed_space_info(space_info))
+		flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+
+	if (!should_alloc_chunk(extent_root, space_info, alloc_bytes,
+				flags, force)) {
+		space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
 		spin_unlock(&space_info->lock);
 		return 0;
 	} else if (space_info->chunk_alloc) {
@@ -3336,13 +3345,6 @@ again:
 	}
 
 	/*
-	 * If we have mixed data/metadata chunks we want to make sure we keep
-	 * allocating mixed chunks instead of individual chunks.
-	 */
-	if (btrfs_mixed_space_info(space_info))
-		flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
-
-	/*
 	 * if we''re doing a data chunk, go ahead and make sure that
 	 * we keep a reasonable number of metadata chunks allocated in the
 	 * FS as well.
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-26 21:19 UTC

head link

[PATCH 12/20] Btrfs: introduce verbose debug mode for patched clustered allocation recovery

This patch adds several debug messages that helped me track down
problems in the cluster allocation logic.  All the messages are
disabled by default, so that they''re optimized away, but enabling the
commented-out settings of debug brings some helpful messages.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c |  148 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 147 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 92e640b..823ab22 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5073,6 +5073,88 @@ enum btrfs_loop_type {
 	LOOP_NO_EMPTY_SIZE = 4,
 };
 
+/* ??? Move to free-space-cache.c? */
+static void
+btrfs_dump_free_space_tree (const char *kern, int debugid, int loop,
+			    int detailed, const char *what, const char *what2,
+			    unsigned long long prev, struct rb_node *node) {
+	struct btrfs_free_space *entry;
+	int entries = 0, frags = 0;
+	unsigned long long size = 0;
+	unsigned long bits = 0, i, p, q;
+
+	if (detailed)
+		printk("%sbtrfs %x.%i: %s %s %llx:\n",
+		       kern, debugid, loop, what, what2, prev);
+
+	while (node) {
+		entries++;
+		entry = rb_entry(node, struct btrfs_free_space, offset_index);
+		node = rb_next(&entry->offset_index);
+
+		size += entry->bytes;
+
+		if (detailed)
+			printk("%sbtrfs %x.%i:  +%llx,%llx%s\n",
+			       kern, debugid, loop,
+			       (long long)(entry->offset - prev),
+			       (unsigned long long)entry->bytes,
+			       entry->bitmap ? (detailed > 1 ? ":" : "
bitmap") : "");
+
+		if (!entry->bitmap)
+			continue;
+
+		i = 0;
+#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
+		do {
+			p = i;
+			i = find_next_bit (entry->bitmap, BITS_PER_BITMAP, i);
+			q = i;
+			i = find_next_zero_bit (entry->bitmap, BITS_PER_BITMAP, i);
+
+			if (i != q)
+				frags++;
+			bits += i - q;
+
+			if (detailed > 1)
+				printk("%sbtrfs %x.%i:   b+%lx,%lx\n",
+				       kern, debugid, loop, q - p, i - q);
+		} while (i < BITS_PER_BITMAP);
+#undef BITS_PER_BITMAP
+	}
+
+	if (detailed)
+		printk("%sbtrfs %x.%i:  entries %x size %llx bits %lx frags %x\n",
+		       kern, debugid, loop, entries, size, bits, frags);
+	else
+		printk("%sbtrfs %x.%i: %s %s %llx: e:%x s:%llx b:%lx f:%x\n",
+		       kern, debugid, loop, what, what2,
+		       prev, entries, size, bits, frags);
+}
+
+static void
+btrfs_dump_cluster (const char *kern, int debugid, int loop, int detailed,
+		    const char *what, struct btrfs_free_cluster *cluster) {
+	spin_lock (&cluster->lock);
+
+	btrfs_dump_free_space_tree (kern, debugid, loop,
+				    detailed, what, "cluster",
+				    cluster->window_start,
+				    rb_first(&cluster->root));
+
+	spin_unlock (&cluster->lock);
+}
+
+static void
+btrfs_dump_block_group_free_space (const char *kern, int debugid, int loop,
+				   int detailed, const char *what,
+				   struct btrfs_block_group_cache *block_group) {
+	btrfs_dump_free_space_tree (kern, debugid, loop,
+				    detailed, what, "block group",
+				    block_group->key.objectid,
+		rb_first(&block_group->free_space_ctl->free_space_offset));
+}
+
 /*
  * walks the btree of allocated extents and find a hole of a given size.
  * The key ins is changed to record the hole:
@@ -5108,6 +5190,9 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
 	bool have_caching_bg = false;
 	u64 ideal_cache_percent = 0;
 	u64 ideal_cache_offset = 0;
+	int debug = 0;
+	int debugid = 0;
+	static atomic_t debugcnt;
 
 	WARN_ON(num_bytes < root->sectorsize);
 	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -5131,6 +5216,8 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
 		allowed_chunk_alloc = 1;
 
 	if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
+		/* debug = 1; */
+		debugid = atomic_inc_return(&debugcnt);
 		last_ptr = &root->fs_info->meta_alloc_cluster;
 		if (!btrfs_test_opt(root, SSD))
 			empty_cluster = 64 * 1024;
@@ -5158,6 +5245,10 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
 ideal_cache:
 		block_group = btrfs_lookup_block_group(root->fs_info,
 						       search_start);
+		if (debug > 1)
+			printk(KERN_DEBUG "btrfs %x.%i: ideal cache block %llx\n",
+			       debugid, loop,
+			       (unsigned long long)block_group->key.objectid);
 		/*
 		 * we don''t want to use the block group if it doesn''t match
our
 		 * allocation bits, or if its not cached.
@@ -5219,6 +5310,11 @@ search:
 
 have_block_group:
 		cached = block_group_cache_done(block_group);
+		if (debug > 1)
+			printk(KERN_DEBUG "btrfs %x.%i: block group %llx c:%i\n",
+			       debugid, loop,
+			       (unsigned long long)search_start,
+			       block_group->cached);
 		if (unlikely(!cached)) {
 			u64 free_percent;
 
@@ -5265,6 +5361,10 @@ alloc:
 		if (cached &&
 		    block_group->free_space_ctl->free_space <
 		    num_bytes + empty_cluster + empty_size) {
+			if (debug > 1 && cached)
+				printk(KERN_DEBUG "btrfs %x.%i: %llx is not enough space\n",
+				       debugid, loop,
+				       (unsigned long long)block_group->free_space_ctl->free_space);
 			spin_unlock(&block_group->free_space_ctl->tree_lock);
 			goto loop;
 		}
@@ -5293,6 +5393,11 @@ alloc:
 				goto checks;
 			}
 
+			/* debug = 2; */
+			if (debug > 1)
+				printk(KERN_DEBUG "btrfs %x.%i: failed cluster alloc\n",
+				       debugid, loop);
+
 			spin_lock(&last_ptr->lock);
 			/*
 			 * whoops, this cluster doesn''t actually point to
@@ -5341,6 +5446,9 @@ refill_cluster:
 			 * this cluster didn''t work out, free it and
 			 * start over
 			 */
+			/* debug = 2; */
+			if ((debug > 1 || (debug && last_ptr->block_group)) &&
last_ptr->window_start)
+				btrfs_dump_cluster(KERN_DEBUG, debugid, loop, 0, "drop",
last_ptr);
 			btrfs_return_cluster_to_free_space(NULL, last_ptr);
 
 			last_ptr_loop = 0;
@@ -5351,6 +5459,8 @@ refill_cluster:
 					       search_start, num_bytes,
 					       empty_cluster + empty_size);
 			if (ret == 0) {
+				if (debug > 1 || (debug && last_ptr->block_group))
+					btrfs_dump_cluster(KERN_INFO, debugid, loop, 0, "new",
last_ptr);
 				/*
 				 * now pull our allocation out of this
 				 * cluster
@@ -5363,15 +5473,23 @@ refill_cluster:
 					spin_unlock(&last_ptr->refill_lock);
 					goto checks;
 				}
+				if (debug)
+					printk(KERN_INFO "btrfs %x.%i: failed new cluster alloc\n",
+					       debugid, loop);
 			} else if (!cached && loop > LOOP_CACHING_NOWAIT
 				   && !failed_cluster_refill) {
 				spin_unlock(&last_ptr->refill_lock);
 
+				if (debug > 1)
+					printk(KERN_DEBUG "btrfs %x.%i: no new cluster, caching\n",
+					       debugid, loop);
+
 				failed_cluster_refill = true;
 				wait_block_group_cache_progress(block_group,
 				       num_bytes + empty_cluster + empty_size);
 				goto have_block_group;
-			}
+			} else if (cached && debug > 1)
+				btrfs_dump_block_group_free_space(KERN_DEBUG, debugid, loop, 1, "no
cluster in", block_group);
 
 			/*
 			 * at this point we either didn''t find a cluster
@@ -5379,12 +5497,19 @@ refill_cluster:
 			 * cluster.  Free the cluster we''ve been trying
 			 * to use, and go to the next block group
 			 */
+			if ((debug > 1 || (debug && last_ptr->block_group)) &&
last_ptr->window_start)
+				btrfs_dump_cluster(KERN_DEBUG, debugid, loop, 0, "xdrop",
last_ptr);
 			btrfs_return_cluster_to_free_space(NULL, last_ptr);
 			spin_unlock(&last_ptr->refill_lock);
 			goto loop;
 		}
 
 unclustered_alloc:
+		if (debug > 1 || (debug && last_ptr))
+			printk(KERN_DEBUG "btrfs %x.%i: old alloc %llx+%llx\n",
+			       debugid, loop,
+			       (unsigned long long)num_bytes,
+			       (unsigned long long)empty_size);
 		offset = btrfs_find_space_for_alloc(block_group, search_start,
 						    num_bytes, empty_size);
 		/*
@@ -5398,6 +5523,9 @@ unclustered_alloc:
 		 */
 		if (!offset && !failed_alloc && !cached &&
 		    loop > LOOP_CACHING_NOWAIT) {
+			if (debug > 1)
+				printk(KERN_DEBUG "btrfs %x.%i: failed old alloc, caching\n",
+				       debugid, loop);
 			wait_block_group_cache_progress(block_group,
 						num_bytes + empty_size);
 			failed_alloc = true;
@@ -5517,10 +5645,21 @@ loop:
 				allowed_chunk_alloc = 0;
 				if (ret == 1)
 					done_chunk_alloc = 1;
+				if (debug > 1) {
+					if (done_chunk_alloc)
+						printk(KERN_INFO "btrfs %x.%i: new chunk\n",
+						       debugid, loop);
+					else
+						printk(KERN_INFO "btrfs %x.%i: no new chunk\n",
+						       debugid, loop);
+				}
 			} else if (!done_chunk_alloc &&
 				   space_info->force_alloc = 				   CHUNK_ALLOC_NO_FORCE) {
 				space_info->force_alloc = CHUNK_ALLOC_LIMITED;
+				if (debug > 1)
+					printk(KERN_INFO "btrfs %x.%i: get new chunk\n",
+					       debugid, loop);
 			}
 
 		       /*
@@ -5538,8 +5677,15 @@ loop:
 
 		goto search;
 	} else if (!ins->objectid) {
+		if (debug > 1)
+			printk(KERN_DEBUG "btrfs %x.%i: alloc failed\n",
+			       debugid, loop);
 		ret = -ENOSPC;
 	} else if (ins->objectid) {
+		if (debug > 1)
+			printk(KERN_DEBUG "btrfs %x.%i: alloced %llx\n",
+			       debugid, loop, ins->objectid);
+
 		ret = 0;
 	}
 
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-26 23:53 UTC

head link

[PATCH 05/20] Btrfs: start search for new cluster at the beginning of the block group

Instead of starting at zero (offset is always zero), request a cluster
starting at search_start, that denotes the beginning of the current
block group.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c |    6 ++----
 1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bc0f13d..7edb9e6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5287,10 +5287,8 @@ alloc:
 			spin_lock(&last_ptr->refill_lock);
 			if (last_ptr->block_group &&
 			    (last_ptr->block_group->ro ||
-			    !block_group_bits(last_ptr->block_group, data))) {
-				offset = 0;
+			    !block_group_bits(last_ptr->block_group, data)))
 				goto refill_cluster;
-			}
 
 			offset = btrfs_alloc_from_cluster(block_group, last_ptr,
 						 num_bytes, search_start);
@@ -5341,7 +5339,7 @@ refill_cluster:
 			/* allocate a cluster in this block group */
 			ret = btrfs_find_space_cluster(trans, root,
 					       block_group, last_ptr,
-					       offset, num_bytes,
+					       search_start, num_bytes,
 					       empty_cluster + empty_size);
 			if (ret == 0) {
 				/*
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-27 02:05 UTC

head link

[PATCH 09/20] Btrfs: skip allocation attempt from empty cluster

If we don''t have a cluster, don''t bother trying to allocate
from it,
jumping right away to the attempt to allocate a new cluster.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9eec362..92e640b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5280,9 +5280,9 @@ alloc:
 			 * people trying to start a new cluster
 			 */
 			spin_lock(&last_ptr->refill_lock);
-			if (last_ptr->block_group &&
-			    (last_ptr->block_group->ro ||
-			    !block_group_bits(last_ptr->block_group, data)))
+			if (!last_ptr->block_group ||
+			    last_ptr->block_group->ro ||
+			    !block_group_bits(last_ptr->block_group, data))
 				goto refill_cluster;
 
 			offset = btrfs_alloc_from_cluster(block_group, last_ptr,
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-27 22:49 UTC

head link

[PATCH 16/20] Btrfs: try cluster but don''t advance in search list

When we find an existing cluster, we switch to its block group as the
current block group, possibly skipping multiple blocks in the process.
Furthermore, under heavy contention, multiple threads may fail to
allocate from a cluster and then release just-created clusters just to
proceed to create new ones in a different block group.

This patch tries to allocate from an existing cluster regardless of its
block group, and doesn''t switch to that group, instead proceeding to
try to allocate a cluster from the group it was iterating before the
attempt.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c |   76 +++++++++++++++++++++---------------------------
 1 files changed, 33 insertions(+), 43 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 66edda2..7064979 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5174,11 +5174,11 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
 	struct btrfs_root *root = orig_root->fs_info->extent_root;
 	struct btrfs_free_cluster *last_ptr = NULL;
 	struct btrfs_block_group_cache *block_group = NULL;
+	struct btrfs_block_group_cache *used_block_group;
 	int empty_cluster = 2 * 1024 * 1024;
 	int allowed_chunk_alloc = 0;
 	int done_chunk_alloc = 0;
 	struct btrfs_space_info *space_info;
-	int last_ptr_loop = 0;
 	int loop = 0;
 	int index = 0;
 	int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
@@ -5245,6 +5245,7 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
 ideal_cache:
 		block_group = btrfs_lookup_block_group(root->fs_info,
 						       search_start);
+		used_block_group = block_group;
 		if (debug > 1)
 			printk(KERN_DEBUG "btrfs %x.%i: ideal cache block %llx\n",
 			       debugid, loop,
@@ -5286,6 +5287,7 @@ search:
 		u64 offset;
 		int cached;
 
+		used_block_group = block_group;
 		btrfs_get_block_group(block_group);
 		search_start = block_group->key.objectid;
 
@@ -5380,13 +5382,20 @@ alloc:
 			 * people trying to start a new cluster
 			 */
 			spin_lock(&last_ptr->refill_lock);
-			if (!last_ptr->block_group ||
-			    last_ptr->block_group->ro ||
-			    !block_group_bits(last_ptr->block_group, data))
+			used_block_group = last_ptr->block_group;
+			if (used_block_group != block_group &&
+			    (!used_block_group ||
+			     used_block_group->ro ||
+			     !block_group_bits(used_block_group, data))) {
+				used_block_group = block_group;
 				goto refill_cluster;
+			}
+
+			if (used_block_group != block_group)
+				btrfs_get_block_group(used_block_group);
 
-			offset = btrfs_alloc_from_cluster(block_group, last_ptr,
-						 num_bytes, search_start);
+			offset = btrfs_alloc_from_cluster(used_block_group,
+			  last_ptr, num_bytes, used_block_group->key.objectid);
 			if (offset) {
 				/* we have a block, we''re done */
 				spin_unlock(&last_ptr->refill_lock);
@@ -5398,36 +5407,15 @@ alloc:
 				printk(KERN_DEBUG "btrfs %x.%i: failed cluster alloc\n",
 				       debugid, loop);
 
-			spin_lock(&last_ptr->lock);
-			/*
-			 * whoops, this cluster doesn''t actually point to
-			 * this block group.  Get a ref on the block
-			 * group it does point to and try again
-			 */
-			if (!last_ptr_loop && last_ptr->block_group &&
-			    last_ptr->block_group != block_group &&
-			    index <-				 get_block_group_index(last_ptr->block_group)) {
-
-				btrfs_put_block_group(block_group);
-				block_group = last_ptr->block_group;
-				btrfs_get_block_group(block_group);
-				spin_unlock(&last_ptr->lock);
-				spin_unlock(&last_ptr->refill_lock);
-
-				last_ptr_loop = 1;
-				search_start = block_group->key.objectid;
-				/*
-				 * we know this block group is properly
-				 * in the list because
-				 * btrfs_remove_block_group, drops the
-				 * cluster before it removes the block
-				 * group from the list
-				 */
-				goto have_block_group;
+			WARN_ON(last_ptr->block_group != used_block_group);
+			if (used_block_group != block_group) {
+				btrfs_put_block_group(used_block_group);
+				used_block_group = block_group;
 			}
-			spin_unlock(&last_ptr->lock);
+
 refill_cluster:
+			BUG_ON(used_block_group != block_group);
+
 			/* If we are on LOOP_NO_EMPTY_SIZE, we can''t
 			 * set up a new clusters, so lets just skip it
 			 * and let the allocator find whatever block
@@ -5451,8 +5439,6 @@ refill_cluster:
 				btrfs_dump_cluster(KERN_DEBUG, debugid, loop, 0, "drop",
last_ptr);
 			btrfs_return_cluster_to_free_space(NULL, last_ptr);
 
-			last_ptr_loop = 0;
-
 			/* allocate a cluster in this block group */
 			ret = btrfs_find_space_cluster(trans, root,
 					       block_group, last_ptr,
@@ -5539,26 +5525,26 @@ checks:
 		search_start = stripe_align(root, offset);
 		/* move on to the next group */
 		if (search_start + num_bytes >= search_end) {
-			btrfs_add_free_space(block_group, offset, num_bytes);
+			btrfs_add_free_space(used_block_group, offset, num_bytes);
 			goto loop;
 		}
 
 		/* move on to the next group */
 		if (search_start + num_bytes >
-		    block_group->key.objectid + block_group->key.offset) {
-			btrfs_add_free_space(block_group, offset, num_bytes);
+		    used_block_group->key.objectid + used_block_group->key.offset) {
+			btrfs_add_free_space(used_block_group, offset, num_bytes);
 			goto loop;
 		}
 
 		if (offset < search_start)
-			btrfs_add_free_space(block_group, offset,
+			btrfs_add_free_space(used_block_group, offset,
 					     search_start - offset);
 		BUG_ON(offset > search_start);
 
-		ret = btrfs_update_reserved_bytes(block_group, num_bytes,
+		ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
 						  alloc_type);
 		if (ret == -EAGAIN) {
-			btrfs_add_free_space(block_group, offset, num_bytes);
+			btrfs_add_free_space(used_block_group, offset, num_bytes);
 			goto loop;
 		}
 
@@ -5567,15 +5553,19 @@ checks:
 		ins->offset = num_bytes;
 
 		if (offset < search_start)
-			btrfs_add_free_space(block_group, offset,
+			btrfs_add_free_space(used_block_group, offset,
 					     search_start - offset);
 		BUG_ON(offset > search_start);
+		if (used_block_group != block_group)
+			btrfs_put_block_group(used_block_group);
 		btrfs_put_block_group(block_group);
 		break;
 loop:
 		failed_cluster_refill = false;
 		failed_alloc = false;
 		BUG_ON(index != get_block_group_index(block_group));
+		if (used_block_group != block_group)
+			btrfs_put_block_group(used_block_group);
 		btrfs_put_block_group(block_group);
 	}
 	up_read(&space_info->groups_sem);
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 00:07 UTC

head link

[PATCH 08/20] Btrfs: try to allocate from cluster even at LOOP_NO_EMPTY_SIZE

If we reach LOOP_NO_EMPTY_SIZE, we won''t even try to use a cluster that
others might have set up.  Odds are that there won''t be one, but if
someone else succeeded in setting it up, we might as well use it, even
if we don''t try to set up a cluster again.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c |   26 ++++++++++++++++++--------
 1 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 24eef3a..9eec362 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5271,15 +5271,10 @@ alloc:
 		spin_unlock(&block_group->free_space_ctl->tree_lock);
 
 		/*
-		 * Ok we want to try and use the cluster allocator, so lets look
-		 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
-		 * have tried the cluster allocator plenty of times at this
-		 * point and not have found anything, so we are likely way too
-		 * fragmented for the clustering stuff to find anything, so lets
-		 * just skip it and let the allocator find whatever block it can
-		 * find
+		 * Ok we want to try and use the cluster allocator, so
+		 * lets look there
 		 */
-		if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
+		if (last_ptr) {
 			/*
 			 * the refill lock keeps out other
 			 * people trying to start a new cluster
@@ -5328,6 +5323,20 @@ alloc:
 			}
 			spin_unlock(&last_ptr->lock);
 refill_cluster:
+			/* If we are on LOOP_NO_EMPTY_SIZE, we can''t
+			 * set up a new clusters, so lets just skip it
+			 * and let the allocator find whatever block
+			 * it can find.  If we reach this point, we
+			 * will have tried the cluster allocator
+			 * plenty of times and not have found
+			 * anything, so we are likely way too
+			 * fragmented for the clustering stuff to find
+			 * anything.  */
+			if (loop >= LOOP_NO_EMPTY_SIZE) {
+				spin_unlock(&last_ptr->refill_lock);
+				goto unclustered_alloc;
+			}
+
 			/*
 			 * this cluster didn''t work out, free it and
 			 * start over
@@ -5375,6 +5384,7 @@ refill_cluster:
 			goto loop;
 		}
 
+unclustered_alloc:
 		offset = btrfs_find_space_for_alloc(block_group, search_start,
 						    num_bytes, empty_size);
 		/*
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 02:04 UTC

head link

[PATCH 15/20] Btrfs: activate allocation debugging

Activate various messages that help track down clustered allocation
problems, that are disabled and optimized out by default.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 823ab22..66edda2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5216,7 +5216,7 @@ static noinline int find_free_extent(struct
btrfs_trans_handle *trans,
 		allowed_chunk_alloc = 1;
 
 	if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
-		/* debug = 1; */
+		debug = 1;
 		debugid = atomic_inc_return(&debugcnt);
 		last_ptr = &root->fs_info->meta_alloc_cluster;
 		if (!btrfs_test_opt(root, SSD))
@@ -5393,7 +5393,7 @@ alloc:
 				goto checks;
 			}
 
-			/* debug = 2; */
+			debug = 2;
 			if (debug > 1)
 				printk(KERN_DEBUG "btrfs %x.%i: failed cluster alloc\n",
 				       debugid, loop);
@@ -5446,7 +5446,7 @@ refill_cluster:
 			 * this cluster didn''t work out, free it and
 			 * start over
 			 */
-			/* debug = 2; */
+			debug = 2;
 			if ((debug > 1 || (debug && last_ptr->block_group)) &&
last_ptr->window_start)
 				btrfs_dump_cluster(KERN_DEBUG, debugid, loop, 0, "drop",
last_ptr);
 			btrfs_return_cluster_to_free_space(NULL, last_ptr);
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 13:52 UTC

head link

[PATCH 02/20] Btrfs: initialize new bitmaps'' list

We''re failing to create clusters with bitmaps because
setup_cluster_no_bitmap checks that the list is empty before inserting
the bitmap entry in the list for setup_cluster_bitmap, but the list
field is only initialized when it is restored from the on-disk free
space cache, or when it is written out to disk.

Besides a potential race condition due to the multiple use of the list
field, filesystem performance severely degrades over time: as we use
up all non-bitmap free extents, the try-to-set-up-cluster dance is
done at every metadata block allocation.  For every block group, we
fail to set up a cluster, and after failing on them all up to twice,
we fall back to the much slower unclustered allocation.

To make matters worse, before the unclustered allocation, we try to
create new block groups until we reach the 1% threshold, which
introduces additional bitmaps and thus block groups that we''ll iterate
over at each metadata block request.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/free-space-cache.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 6e5b7e4..ff179b1 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1470,6 +1470,7 @@ static void add_new_bitmap(struct btrfs_free_space_ctl
*ctl,
 {
 	info->offset = offset_to_bitmap(ctl, offset);
 	info->bytes = 0;
+	INIT_LIST_HEAD(&info->list);
 	link_free_space(ctl, info);
 	ctl->total_bitmaps++;
 
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 14:07 UTC

head link

[PATCH 04/20] Btrfs: reset cluster''s max_size when creating bitmap cluster

The field that indicates the size of the largest contiguous chunk of
free space in the cluster is not initialized when setting up bitmaps,
it''s only increased when we find a larger contiguous chunk.  We end up
retaining a larger value than appropriate for highly-fragmented
clusters, which may cause pointless searches for large contiguous
groups, and even cause clusters that do not meet the density
requirements to be set up.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/free-space-cache.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ff179b1..ec23d43 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2320,6 +2320,7 @@ again:
 
 	if (!found) {
 		start = i;
+		cluster->max_size = 0;
 		found = true;
 	}
 
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 14:22 UTC

head link

[PATCH 11/20] Btrfs: note when a bitmap is skipped because its list is in use

Bitmap lists serve two purposes: recording the order of loading/saving
on-disk free space caches, and setting up a list of bitmaps to try to
set up a cluster.  Complain if a list is unexpectedly busy.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/free-space-cache.c |    7 +++++++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ec23d43..dd7fe43 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -904,6 +904,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct
inode *inode,
 			goto out_nospc;
 
 		if (e->bitmap) {
+			BUG_ON(!list_empty(&e->list));
 			list_add_tail(&e->list, &bitmap_list);
 			bitmaps++;
 		}
@@ -2380,6 +2381,9 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache
*block_group,
 	while (entry->bitmap) {
 		if (list_empty(&entry->list))
 			list_add_tail(&entry->list, bitmaps);
+		else if (entry->bitmap)
+			printk(KERN_ERR "btrfs: not using (busy?!?) bitmap %lli\n",
+			       (unsigned long long)entry->offset);
 		node = rb_next(&entry->offset_index);
 		if (!node)
 			return -ENOSPC;
@@ -2402,6 +2406,9 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache
*block_group,
 		if (entry->bitmap) {
 			if (list_empty(&entry->list))
 				list_add_tail(&entry->list, bitmaps);
+			else
+				printk(KERN_ERR "btrfs: not using (busy?!?) bitmap %lli\n",
+				       (unsigned long long)entry->offset);
 			continue;
 		}
 
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 14:23 UTC

head link

[PATCH 19/20] Btrfs: log when a bitmap is rejected for a cluster

---
 fs/btrfs/free-space-cache.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 953f7dd..0151274 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2316,6 +2316,16 @@ again:
 		i = next_zero;
 	}
 
+	if (!found_bits && total_found)
+		printk(KERN_INFO "btrfs: bitmap %llx want:%llx min:%llx cont:%llx
start:%llx max:%llx total:%llx\n",
+		       (unsigned long long)entry->offset,
+		       (unsigned long long)bytes,
+		       (unsigned long long)min_bytes,
+		       (unsigned long long)cont1_bytes,
+		       (unsigned long long)(start * block_group->sectorsize),
+		       (unsigned long long)cluster->max_size,
+		       (unsigned long long)(total_found * block_group->sectorsize));
+
 	if (!found_bits)
 		return -ENOSPC;
 
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 14:30 UTC

head link

[PATCH 06/20] Btrfs: skip block groups without enough space for a cluster

We test whether a block group has enough free space to hold the
requested block, but when we''re doing clustered allocation, we can
save some cycles by testing whether it has enough room for the cluster
upfront, otherwise we end up attempting to set up a cluster and
failing.  Only in the NO_EMPTY_SIZE loop do we attempt an unclustered
allocation, and by then we''ll have zeroed the cluster size, so this
patch won''t stop us from using the block group as a last resort.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7edb9e6..525ff20 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5264,7 +5264,7 @@ alloc:
 		spin_lock(&block_group->free_space_ctl->tree_lock);
 		if (cached &&
 		    block_group->free_space_ctl->free_space <
-		    num_bytes + empty_size) {
+		    num_bytes + empty_cluster + empty_size) {
 			spin_unlock(&block_group->free_space_ctl->tree_lock);
 			goto loop;
 		}
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 14:36 UTC

head link

[PATCH 07/20] Btrfs: don''t set up allocation result twice

We store the allocation start and length twice in ins, once right
after the other, but with intervening calls that may prevent the
duplicate from being optimized out by the compiler.  Remove one of the
assignments.

Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
---
 fs/btrfs/extent-tree.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 525ff20..24eef3a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5412,9 +5412,6 @@ checks:
 			goto loop;
 		}
 
-		ins->objectid = search_start;
-		ins->offset = num_bytes;
-
 		if (offset < search_start)
 			btrfs_add_free_space(block_group, offset,
 					     search_start - offset);
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 15:41 UTC

head link

[PATCH 03/20] Btrfs: fix comment typo

---
 fs/btrfs/extent-tree.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5d86877..bc0f13d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5304,7 +5304,7 @@ alloc:
 			/*
 			 * whoops, this cluster doesn''t actually point to
 			 * this block group.  Get a ref on the block
-			 * group is does point to and try again
+			 * group it does point to and try again
 			 */
 			if (!last_ptr_loop && last_ptr->block_group &&
 			    last_ptr->block_group != block_group &&
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-28 19:17 UTC

head link

[PATCH 00/20] Here''s my current btrfs patchset

The first 11 patches are relatively simple fixes or improvements that
I suppose go could make it even in 3.2 (02 is particularly essential
to avoid progressive performance degradation and metadata space waste
in the default clustered allocation strategy).

Patch 12 and its complement 15, and also 19, are debugging aids that
helped me track down the problem fixed in 02.

Patch 13 is a revised version of the larger-clusters patch I posted
before, that adds a microoptimization to the bitmap computations to
the earlier version.

Patches 14 to 20 are probably not suitable for inclusion, and are
provided only for reference, although I''m still undecided on 16: it
seems to me to make sense to stick to the ordered list and index
instead of jumping to the current cluster''s block group, but it may
also make sense performance-wise to start at the current cluster and
advance from there.  We still do that, as long as we find a cluster
to begin with, but I''m yet to double check on the race that causes
multiple subsequent releases/creation of clusters under heavy load.
I''m sure I saw it, and I no longer do, but now I''m no longer
sure
whether this is the patch that fixed it, or about the details of how
we came about that scenario.

Patches 14, 17, 18 and 20 were posted before, and I''m probably dropping
them from future patchsets unless I find them to be still useful.

Alexandre Oliva (20):
  Btrfs: enable removal of second disk with raid1 metadata
  Btrfs: initialize new bitmaps'' list
  Btrfs: fix comment typo
  Btrfs: reset cluster''s max_size when creating bitmap cluster
  Btrfs: start search for new cluster at the beginning of the block
    group
  Btrfs: skip block groups without enough space for a cluster
  Btrfs: don''t set up allocation result twice
  Btrfs: try to allocate from cluster even at LOOP_NO_EMPTY_SIZE
  Btrfs: skip allocation attempt from empty cluster
  Btrfs: report reason for failed relocation
  Btrfs: note when a bitmap is skipped because its list is in use
  Btrfs: introduce verbose debug mode for patched clustered allocation
    recovery
  Btrfs: revamp clustered allocation logic
  Btrfs: introduce option to rebalance only metadata
  Btrfs: activate allocation debugging
  Btrfs: try cluster but don''t advance in search list
  Btrfs: introduce -o cluster and -o nocluster
  Btrfs: add -o mincluster option
  Btrfs: log when a bitmap is rejected for a cluster
  Btrfs: don''t waste metadata block groups for clustered allocation

 fs/btrfs/ctree.h            |    3 +-
 fs/btrfs/extent-tree.c      |  297 ++++++++++++++++++++++++++++++++-----------
 fs/btrfs/free-space-cache.c |  132 ++++++++++---------
 fs/btrfs/ioctl.c            |    2 +
 fs/btrfs/ioctl.h            |    3 +
 fs/btrfs/relocation.c       |    8 +
 fs/btrfs/super.c            |   31 ++++-
 fs/btrfs/volumes.c          |   39 +++++-
 fs/btrfs/volumes.h          |    1 +
 9 files changed, 369 insertions(+), 147 deletions(-)

-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Li Zefan

2011-Nov-29 02:25 UTC

head link

Re: [PATCH 00/20] Here''s my current btrfs patchset

Alexandre Oliva wrote:> The first 11 patches are relatively simple fixes or improvements that
> I suppose go could make it even in 3.2 (02 is particularly essential
> to avoid progressive performance degradation and metadata space waste
> in the default clustered allocation strategy).
> 
I think 02 (especially!) and 04 are good candidates for 3.2, and others
are all improvements to me, that can wait until next merge window.
> Patch 12 and its complement 15, and also 19, are debugging aids that
> helped me track down the problem fixed in 02.
> 
> Patch 13 is a revised version of the larger-clusters patch I posted
> before, that adds a microoptimization to the bitmap computations to
> the earlier version.
> 
> Patches 14 to 20 are probably not suitable for inclusion, and are
> provided only for reference, although I''m still undecided on 16:
it
> seems to me to make sense to stick to the ordered list and index
> instead of jumping to the current cluster''s block group, but it
may
> also make sense performance-wise to start at the current cluster and
> advance from there.  We still do that, as long as we find a cluster
> to begin with, but I''m yet to double check on the race that causes
> multiple subsequent releases/creation of clusters under heavy load.
> I''m sure I saw it, and I no longer do, but now I''m no
longer sure
> whether this is the patch that fixed it, or about the details of how
> we came about that scenario.
> 
> Patches 14, 17, 18 and 20 were posted before, and I''m probably
dropping
> them from future patchsets unless I find them to be still useful.
> --
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Christian Brunner

2011-Nov-29 21:00 UTC

head link

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

2011/11/28 Alexandre Oliva
<oliva@lsd.ic.unicamp.br>:> We''re failing to create clusters with bitmaps because
> setup_cluster_no_bitmap checks that the list is empty before inserting
> the bitmap entry in the list for setup_cluster_bitmap, but the list
> field is only initialized when it is restored from the on-disk free
> space cache, or when it is written out to disk.
>
> Besides a potential race condition due to the multiple use of the list
> field, filesystem performance severely degrades over time: as we use
> up all non-bitmap free extents, the try-to-set-up-cluster dance is
> done at every metadata block allocation.  For every block group, we
> fail to set up a cluster, and after failing on them all up to twice,
> we fall back to the much slower unclustered allocation.
This matches exactly what I''ve been observing in our ceph cluster.
I''ve now installed your patches (1-11) on two servers.
The cluster setup problem seems to be gone. - A big thanks for that!

However another thing is causing me some headeache:

When I''m doing havy reading in our ceph cluster. The load and wait-io
on the patched servers is higher than on the unpatched ones.

Dstat from an unpatched server:

----total-cpu-usage---- -dsk/total- -net/total- ---paging-- ---system--
usr sys idl wai hiq siq| read  writ| recv  send|  in   out | int   csw
  1   6  83   8   0   1|  22M  348k| 336k   93M|   0     0 |8445  3715
  1   5  87   7   0   1|  12M 1808k| 214k   65M|   0     0 |5461  1710
  1   3  85  10   0   0|  11M  640k| 313k   49M|   0     0 |5919  2853
  1   6  84   9   0   1|  12M  608k| 358k   69M|   0     0 |7406  3645
  1   7  78  13   0   1|  15M 5344k| 348k  105M|   0     0 |9765  4403
  1   7  80  10   0   1|  22M 1368k| 358k   89M|   0     0 |8036  3202
  1   9  72  16   0   1|  22M 2424k| 646k  137M|   0     0 |  12k 5527

Dstat from a patched server:

---total-cpu-usage---- -dsk/total- -net/total- ---paging-- ---system--
usr sys idl wai hiq siq| read  writ| recv  send|  in   out | int   csw
  1   2  61  35   0   0|2500k 2736k| 141k   34M|   0     0 |4415  1603
  1   4  48  47   0   1|  10M 3924k| 353k   61M|   0     0 |6871  3771
  1   5  55  38   0   1|  10M 1728k| 385k   92M|   0     0 |8030  2617
  2   8  69  20   0   1|  18M 1384k| 435k  130M|   0     0 |  10k 4493
  1   5  85   8   0   1|7664k   84k| 287k   97M|   0     0 |6231  1357
  1   3  91   5   0   0|  10M  144k| 194k   44M|   0     0 |3807  1081
  1   7  66  25   0   1|  20M 1248k| 404k  101M|   0     0 |8676  3632
  0   3  38  58   0   0|8104k 2660k| 176k   40M|   0     0 |4841  2093


This seems to be coming from "btrfs-endio-1". A kernel thread that has
not caught my attention on unpatched systems, yet.

I did some tracing on that process with ftrace and I can see that the
time is wasted in end_bio_extent_readpage(). In a single call to
end_bio_extent_readpage()the functions unlock_extent_cached(),
unlock_page() and btrfs_readpage_end_io_hook() are invoked 128 times
(each).

Do you have any idea what''s going on here?

(Please note that the filesystem is still unmodified - metadata
overhead is large).

Thanks,
Christian
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Nov-30 23:28 UTC

head link

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

On Nov 29, 2011, Christian Brunner <chb@muc.de> wrote:
> When I''m doing havy reading in our ceph cluster. The load and
wait-io
> on the patched servers is higher than on the unpatched ones.
That''s unexpected.
> This seems to be coming from "btrfs-endio-1". A kernel thread
that has
> not caught my attention on unpatched systems, yet.
I suppose I could wave my hands while explaining that you''re getting
higher data throughput, so it''s natural that it would take up more
resources, but that explanation doesn''t satisfy me.  I suppose
allocation might have got slightly more CPU intensive in some cases, as
we now use bitmaps where before we''d only use the cheaper-to-allocate
extents.  But that''s unsafisfying as well.
> Do you have any idea what''s going on here?
Sorry, not really.
> (Please note that the filesystem is still unmodified - metadata
> overhead is large).
Speaking of metadata overhead, I found out that the bitmap-enabling
patch is not enough for a metadata balance to get rid of excess metadata
block groups.  I had to apply patch #16 to get it again.  It sort of
makes sense: without patch 16, too often will we get to the end of the
list of metadata block groups and advance from LOOP_FIND_IDEAL to
LOOP_CACHING_WAIT (skipping NOWAIT after we''ve cached free space for
all
block groups), and if we get to the end of that loop as well (how?  I
couldn''t quite figure out, but it only seems to happen under high
contention) we''ll advance to LOOP_ALLOC_CHUNK and end up unnecessarily
allocating a new chunk.

Patch 16 makes sure we don''t jump ahead during LOOP_CACHING_WAIT, so we
won''t get new chunks unless they can really help us keep the system
going.

-- 
Alexandre Oliva, freedom fighter    http://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist      Red Hat Brazil Compiler Engineer
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Christian Brunner

2011-Dec-01 14:50 UTC

head link

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

2011/12/1 Alexandre Oliva
<oliva@lsd.ic.unicamp.br>:> On Nov 29, 2011, Christian Brunner <chb@muc.de> wrote:
>
>> When I''m doing havy reading in our ceph cluster. The load and
wait-io
>> on the patched servers is higher than on the unpatched ones.
>
> That''s unexpected.
>
>> This seems to be coming from "btrfs-endio-1". A kernel thread
that has
>> not caught my attention on unpatched systems, yet.
>
> I suppose I could wave my hands while explaining that you''re
getting
> higher data throughput, so it''s natural that it would take up more
> resources, but that explanation doesn''t satisfy me.  I suppose
> allocation might have got slightly more CPU intensive in some cases, as
> we now use bitmaps where before we''d only use the
cheaper-to-allocate
> extents.  But that''s unsafisfying as well.
I must admit, that I do not completely understand the difference
between bitmaps and extents.

From what I see on my servers, I can tell, that the degradation over
time is gone. (Rebooting the servers every day is no longer needed.
This is a real plus.) But the performance compared to a freshly
booted, unpatched server is much slower with my ceph workload.

I wonder if it would make sense to initialize the list field only,
when the cluster setup fails? This would avoid the fallback to the
much unclustered allocation and would give us the cheaper-to-allocate
extents.

Regards,
Christian
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Christian Brunner

2011-Dec-07 20:50 UTC

head link

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

2011/12/1 Christian Brunner <chb@muc.de>:> 2011/12/1 Alexandre Oliva <oliva@lsd.ic.unicamp.br>:
>> On Nov 29, 2011, Christian Brunner <chb@muc.de> wrote:
>>
>>> When I''m doing havy reading in our ceph cluster. The load
and wait-io
>>> on the patched servers is higher than on the unpatched ones.
>>
>> That''s unexpected.
In the mean time I know, that it''s not related to the reads.
>> I suppose I could wave my hands while explaining that you''re
getting
>> higher data throughput, so it''s natural that it would take up
more
>> resources, but that explanation doesn''t satisfy me.  I suppose
>> allocation might have got slightly more CPU intensive in some cases, as
>> we now use bitmaps where before we''d only use the
cheaper-to-allocate
>> extents.  But that''s unsafisfying as well.
>
> I must admit, that I do not completely understand the difference
> between bitmaps and extents.
>
> From what I see on my servers, I can tell, that the degradation over
> time is gone. (Rebooting the servers every day is no longer needed.
> This is a real plus.) But the performance compared to a freshly
> booted, unpatched server is much slower with my ceph workload.
>
> I wonder if it would make sense to initialize the list field only,
> when the cluster setup fails? This would avoid the fallback to the
> much unclustered allocation and would give us the cheaper-to-allocate
> extents.
I''ve now tried various combinations of you patches and I can really
nail it down to this one line.

With this patch applied I get much higher write-io values than without
it. Some of the other patches help to reduce the effect, but it''s
still significant.

iostat on an unpatched node is giving me:

Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
avgrq-sz avgqu-sz   await  svctm  %util
sda             105.90     0.37   15.42   14.48  2657.33   560.13
107.61     1.89   62.75   6.26  18.71

while on a node with this patch it''s
sda             128.20     0.97   11.10   57.15  3376.80   552.80
57.58    20.58  296.33   4.16  28.36


Also interesting, is the fact that the average request size on the
patched node is much smaller.

Josef was telling me, that this could be related to the number of
bitmaps we write out, but I''ve no idea how to trace this.

I would be very happy if someone could give me a hint on what to do
next, as this is one of the last remaining issues with our ceph
cluster.

Thanks,
Christian
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Christian Brunner

2011-Dec-09 15:17 UTC

head link

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

2011/12/7 Christian Brunner <chb@muc.de>:> 2011/12/1 Christian Brunner <chb@muc.de>:
>> 2011/12/1 Alexandre Oliva <oliva@lsd.ic.unicamp.br>:
>>> On Nov 29, 2011, Christian Brunner <chb@muc.de> wrote:
>>>
>>>> When I''m doing havy reading in our ceph cluster. The
load and wait-io
>>>> on the patched servers is higher than on the unpatched ones.
>>>
>>> That''s unexpected.
>
> In the mean time I know, that it''s not related to the reads.
>
>>> I suppose I could wave my hands while explaining that
you''re getting
>>> higher data throughput, so it''s natural that it would take
up more
>>> resources, but that explanation doesn''t satisfy me.  I
suppose
>>> allocation might have got slightly more CPU intensive in some
cases, as
>>> we now use bitmaps where before we''d only use the
cheaper-to-allocate
>>> extents.  But that''s unsafisfying as well.
>>
>> I must admit, that I do not completely understand the difference
>> between bitmaps and extents.
>>
>> From what I see on my servers, I can tell, that the degradation over
>> time is gone. (Rebooting the servers every day is no longer needed.
>> This is a real plus.) But the performance compared to a freshly
>> booted, unpatched server is much slower with my ceph workload.
>>
>> I wonder if it would make sense to initialize the list field only,
>> when the cluster setup fails? This would avoid the fallback to the
>> much unclustered allocation and would give us the cheaper-to-allocate
>> extents.
>
> I''ve now tried various combinations of you patches and I can
really
> nail it down to this one line.
>
> With this patch applied I get much higher write-io values than without
> it. Some of the other patches help to reduce the effect, but it''s
> still significant.
>
> iostat on an unpatched node is giving me:
>
> Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
> avgrq-sz avgqu-sz   await  svctm  %util
> sda             105.90     0.37   15.42   14.48  2657.33   560.13
> 107.61     1.89   62.75   6.26  18.71
>
> while on a node with this patch it''s
> sda             128.20     0.97   11.10   57.15  3376.80   552.80
> 57.58    20.58  296.33   4.16  28.36
>
>
> Also interesting, is the fact that the average request size on the
> patched node is much smaller.
>
> Josef was telling me, that this could be related to the number of
> bitmaps we write out, but I''ve no idea how to trace this.
>
> I would be very happy if someone could give me a hint on what to do
> next, as this is one of the last remaining issues with our ceph
> cluster.
This is still bugging me and I just remembered something that might be
helpfull. Also I hope that this is not misleading...

Back in 2.6.38 we were running ceph without btrfs performance
degradation. I found a thread on the list where similar problems where
reported:

http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg10346.html

In that thread someone bisected the issue to

From 4e69b598f6cfb0940b75abf7e179d6020e94ad1e Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Mon, 21 Mar 2011 10:11:24 -0400
Subject: [PATCH] Btrfs: cleanup how we setup free space clusters

In this commit the bitmaps handling was changed. So I just thought
that this may be related.

I''m still hoping, that someone with a deeper understanding of btrfs
could take a look at this.

Thanks,
Christian
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Alexandre Oliva

2011-Dec-12 07:47 UTC

head link

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

On Dec  7, 2011, Christian Brunner <chb@muc.de> wrote:
> With this patch applied I get much higher write-io values than without
> it. Some of the other patches help to reduce the effect, but it''s
> still significant.
> iostat on an unpatched node is giving me:
> Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
> avgrq-sz avgqu-sz   await  svctm  %util
> sda             105.90     0.37   15.42   14.48  2657.33   560.13
> 107.61     1.89   62.75   6.26  18.71
> while on a node with this patch it''s
> sda             128.20     0.97   11.10   57.15  3376.80   552.80
> 57.58    20.58  296.33   4.16  28.36
> Also interesting, is the fact that the average request size on the
> patched node is much smaller.
That''s probably expected for writes, as bitmaps are expected to be more
fragmented, even if used only for metadata (or are you on SSD?)


Bitmaps are just a different in-memory (and on-disk-cache, if enabled)
representation of free space, that can be far more compact: one bit per
disk block, rather than an extent list entry.  They''re interchangeable
otherwise, it''s just that searching bitmaps for a free block (bit) is
somewhat more expensive than taking the next entry from a list, but you
don''t want to use up too much memory with long lists of
e.g. single-block free extents.

-- 
Alexandre Oliva, freedom fighter    http://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist      Red Hat Brazil Compiler Engineer
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Christian Brunner

2011-Dec-12 08:05 UTC

head link

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

2011/12/12 Alexandre Oliva
<oliva@lsd.ic.unicamp.br>:> On Dec  7, 2011, Christian Brunner <chb@muc.de> wrote:
>
>> With this patch applied I get much higher write-io values than without
>> it. Some of the other patches help to reduce the effect, but
it''s
>> still significant.
>
>> iostat on an unpatched node is giving me:
>
>> Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s
>> avgrq-sz avgqu-sz   await  svctm  %util
>> sda             105.90     0.37   15.42   14.48  2657.33   560.13
>> 107.61     1.89   62.75   6.26  18.71
>
>> while on a node with this patch it''s
>> sda             128.20     0.97   11.10   57.15  3376.80   552.80
>> 57.58    20.58  296.33   4.16  28.36
>
>
>> Also interesting, is the fact that the average request size on the
>> patched node is much smaller.
>
> That''s probably expected for writes, as bitmaps are expected to be
more
> fragmented, even if used only for metadata (or are you on SSD?)
>
It''s a traditional hardware RAID5 with spinning disks. - I would
accept this if the writes would start right after the mount, but in
this case it takes a few hours until the writes increase. Thats why
I''m allmost certain that something is still wrong.
> Bitmaps are just a different in-memory (and on-disk-cache, if enabled)
> representation of free space, that can be far more compact: one bit per
> disk block, rather than an extent list entry.  They''re
interchangeable
> otherwise, it''s just that searching bitmaps for a free block (bit)
is
> somewhat more expensive than taking the next entry from a list, but you
> don''t want to use up too much memory with long lists of
> e.g. single-block free extents.
Thanks for the explanation! I''ll try to insert some debuging code,
once my test server is ready.

Christian
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Btrfs devel - Nov 2011 - [PATCH 00/20] Here's my current btrfs patchset

[PATCH 13/20] Btrfs: revamp clustered allocation logic

[PATCH 17/20] Btrfs: introduce -o cluster and -o nocluster

[PATCH 14/20] Btrfs: introduce option to rebalance only metadata

[PATCH 01/20] Btrfs: enable removal of second disk with raid1 metadata

[PATCH 18/20] Btrfs: add -o mincluster option

[PATCH 10/20] Btrfs: report reason for failed relocation

[PATCH 20/20] Btrfs: don''t waste metadata block groups for clustered allocation

[PATCH 12/20] Btrfs: introduce verbose debug mode for patched clustered allocation recovery

[PATCH 05/20] Btrfs: start search for new cluster at the beginning of the block group

[PATCH 09/20] Btrfs: skip allocation attempt from empty cluster

[PATCH 16/20] Btrfs: try cluster but don''t advance in search list

[PATCH 08/20] Btrfs: try to allocate from cluster even at LOOP_NO_EMPTY_SIZE

[PATCH 15/20] Btrfs: activate allocation debugging

[PATCH 02/20] Btrfs: initialize new bitmaps'' list

[PATCH 04/20] Btrfs: reset cluster''s max_size when creating bitmap cluster

[PATCH 11/20] Btrfs: note when a bitmap is skipped because its list is in use

[PATCH 19/20] Btrfs: log when a bitmap is rejected for a cluster

[PATCH 06/20] Btrfs: skip block groups without enough space for a cluster

[PATCH 07/20] Btrfs: don''t set up allocation result twice

[PATCH 03/20] Btrfs: fix comment typo

[PATCH 00/20] Here''s my current btrfs patchset

Re: [PATCH 00/20] Here''s my current btrfs patchset

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list

Re: [PATCH 02/20] Btrfs: initialize new bitmaps'' list