Hello,
This patch adds reserved extents accounting. This helps the allocator
choose block group that free extents are allocated from.
Regards
Yan Zheng
---
diff -r b5babeda93fa ctree.h
--- a/ctree.h	Tue Sep 09 02:16:12 2008 +0800
+++ b/ctree.h	Tue Sep 09 02:16:20 2008 +0800
@@ -499,6 +499,7 @@
 	u64 total_bytes;
 	u64 bytes_used;
 	u64 bytes_pinned;
+	u64 bytes_reserved;
 	int full;
 	int force_alloc;
 	struct list_head list;
@@ -510,6 +511,7 @@
 	struct btrfs_space_info *space_info;
 	spinlock_t lock;
 	u64 pinned;
+	u64 reserved;
 	u64 flags;
 	int cached;
 	int ro;
diff -r b5babeda93fa extent-tree.c
--- a/extent-tree.c	Tue Sep 09 02:16:12 2008 +0800
+++ b/extent-tree.c	Tue Sep 09 02:16:20 2008 +0800
@@ -374,7 +374,6 @@
 	u64 last = 0;
 	u64 start;
 	u64 end;
-	u64 free_check;
 	u64 ptr;
 	int bit;
 	int ret;
@@ -385,7 +384,7 @@
 	block_group_cache = &info->block_group_cache;
 
 	if (data & BTRFS_BLOCK_GROUP_METADATA)
-		factor = 9;
+		factor = 8;
 
 	bit = block_group_state_bits(data);
 
@@ -395,7 +394,7 @@
 		if (shint && block_group_bits(shint, data) && !shint->ro)
{
 			spin_lock(&shint->lock);
 			used = btrfs_block_group_used(&shint->item);
-			if (used + shint->pinned <
+			if (used + shint->pinned + shint->reserved <
 			    div_factor(shint->key.offset, factor)) {
 				spin_unlock(&shint->lock);
 				return shint;
@@ -406,7 +405,7 @@
 	if (hint && !hint->ro && block_group_bits(hint, data)) {
 		spin_lock(&hint->lock);
 		used = btrfs_block_group_used(&hint->item);
-		if (used + hint->pinned <
+		if (used + hint->pinned + hint->reserved <
 		    div_factor(hint->key.offset, factor)) {
 			spin_unlock(&hint->lock);
 			return hint;
@@ -438,8 +437,8 @@
 		used = btrfs_block_group_used(&cache->item);
 
 		if (!cache->ro && block_group_bits(cache, data)) {
-			free_check = div_factor(cache->key.offset, factor);
-			if (used + cache->pinned < free_check) {
+			if (used + cache->pinned + cache->reserved <
+			    div_factor(cache->key.offset, factor)) {
 				found_group = cache;
 				spin_unlock(&cache->lock);
 				goto found;
@@ -1325,24 +1324,25 @@
 		if (ret)
 			break;
 
-		last = end + 1;
 		ret = get_state_private(block_group_cache, start, &ptr);
 		if (ret)
 			break;
 		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
-		err = write_one_cache_group(trans, root,
-					    path, cache);
+
+		clear_extent_bits(block_group_cache, start, end,
+				  BLOCK_GROUP_DIRTY, GFP_NOFS);
+
+		err = write_one_cache_group(trans, root, path, cache);
 		/*
 		 * if we fail to write the cache group, we want
 		 * to keep it marked dirty in hopes that a later
 		 * write will work
 		 */
 		if (err) {
+			last = end + 1;
 			werr = err;
 			continue;
 		}
-		clear_extent_bits(block_group_cache, start, end,
-				  BLOCK_GROUP_DIRTY, GFP_NOFS);
 	}
 	btrfs_free_path(path);
 	mutex_unlock(&root->fs_info->alloc_mutex);
@@ -1387,6 +1387,7 @@
 	found->total_bytes = total_bytes;
 	found->bytes_used = bytes_used;
 	found->bytes_pinned = 0;
+	found->bytes_reserved = 0;
 	found->full = 0;
 	found->force_alloc = 0;
 	*space_info = found;
@@ -1784,30 +1785,51 @@
 	}
 	while (num > 0) {
 		cache = btrfs_lookup_block_group(fs_info, bytenr);
-		if (!cache) {
-			u64 first = first_logical_byte(root, bytenr);
-			WARN_ON(first < bytenr);
-			len = min(first - bytenr, num);
-		} else {
-			len = min(num, cache->key.offset -
-				  (bytenr - cache->key.objectid));
-		}
+		BUG_ON(!cache);
+		len = min(num, cache->key.offset -
+			  (bytenr - cache->key.objectid));
 		if (pin) {
-			if (cache) {
-				spin_lock(&cache->lock);
-				cache->pinned += len;
-				cache->space_info->bytes_pinned += len;
-				spin_unlock(&cache->lock);
-			}
+			spin_lock(&cache->lock);
+			cache->pinned += len;
+			cache->space_info->bytes_pinned += len;
+			spin_unlock(&cache->lock);
 			fs_info->total_pinned += len;
 		} else {
-			if (cache) {
-				spin_lock(&cache->lock);
-				cache->pinned -= len;
-				cache->space_info->bytes_pinned -= len;
-				spin_unlock(&cache->lock);
-			}
+			spin_lock(&cache->lock);
+			cache->pinned -= len;
+			cache->space_info->bytes_pinned -= len;
+			spin_unlock(&cache->lock);
 			fs_info->total_pinned -= len;
+		}
+		bytenr += len;
+		num -= len;
+	}
+	return 0;
+}
+
+static int update_reserved_extents(struct btrfs_root *root,
+				   u64 bytenr, u64 num, int reserve)
+{
+	u64 len;
+	struct btrfs_block_group_cache *cache;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+
+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
+	while (num > 0) {
+		cache = btrfs_lookup_block_group(fs_info, bytenr);
+		BUG_ON(!cache);
+		len = min(num, cache->key.offset -
+			  (bytenr - cache->key.objectid));
+		if (reserve) {
+			spin_lock(&cache->lock);
+			cache->reserved += len;
+			cache->space_info->bytes_reserved += len;
+			spin_unlock(&cache->lock);
+		} else {
+			spin_lock(&cache->lock);
+			cache->reserved -= len;
+			cache->space_info->bytes_reserved -= len;
+			spin_unlock(&cache->lock);
 		}
 		bytenr += len;
 		num -= len;
@@ -2518,6 +2540,7 @@
 	maybe_lock_mutex(root);
 	set_extent_dirty(&root->fs_info->free_space_cache,
 			 start, start + len - 1, GFP_NOFS);
+	update_reserved_extents(root, start, len, 0);
 	maybe_unlock_mutex(root);
 	return 0;
 }
@@ -2534,6 +2557,7 @@
 	ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
 				     empty_size, hint_byte, search_end, ins,
 				     data);
+	update_reserved_extents(root, ins->objectid, ins->offset, 1);
 	maybe_unlock_mutex(root);
 	return ret;
 }
@@ -2642,6 +2666,7 @@
 	ret = __btrfs_alloc_reserved_extent(trans, root, parent,
 					    root_objectid, ref_generation,
 					    owner, owner_offset, ins);
+	update_reserved_extents(root, ins->objectid, ins->offset, 0);
 	maybe_unlock_mutex(root);
 	return ret;
 }
@@ -4260,6 +4285,7 @@
 
 	spin_lock(&block_group->lock);
 	WARN_ON(block_group->pinned > 0);
+	WARN_ON(block_group->reserved > 0);
 	WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
 	spin_unlock(&block_group->lock);
 	ret = 0;
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html