Hello,
This patch adds reserved extents accounting. This helps the allocator
choose block group that free extents are allocated from.
Regards
Yan Zheng
---
diff -r b5babeda93fa ctree.h
--- a/ctree.h Tue Sep 09 02:16:12 2008 +0800
+++ b/ctree.h Tue Sep 09 02:16:20 2008 +0800
@@ -499,6 +499,7 @@
u64 total_bytes;
u64 bytes_used;
u64 bytes_pinned;
+ u64 bytes_reserved;
int full;
int force_alloc;
struct list_head list;
@@ -510,6 +511,7 @@
struct btrfs_space_info *space_info;
spinlock_t lock;
u64 pinned;
+ u64 reserved;
u64 flags;
int cached;
int ro;
diff -r b5babeda93fa extent-tree.c
--- a/extent-tree.c Tue Sep 09 02:16:12 2008 +0800
+++ b/extent-tree.c Tue Sep 09 02:16:20 2008 +0800
@@ -374,7 +374,6 @@
u64 last = 0;
u64 start;
u64 end;
- u64 free_check;
u64 ptr;
int bit;
int ret;
@@ -385,7 +384,7 @@
block_group_cache = &info->block_group_cache;
if (data & BTRFS_BLOCK_GROUP_METADATA)
- factor = 9;
+ factor = 8;
bit = block_group_state_bits(data);
@@ -395,7 +394,7 @@
if (shint && block_group_bits(shint, data) && !shint->ro)
{
spin_lock(&shint->lock);
used = btrfs_block_group_used(&shint->item);
- if (used + shint->pinned <
+ if (used + shint->pinned + shint->reserved <
div_factor(shint->key.offset, factor)) {
spin_unlock(&shint->lock);
return shint;
@@ -406,7 +405,7 @@
if (hint && !hint->ro && block_group_bits(hint, data)) {
spin_lock(&hint->lock);
used = btrfs_block_group_used(&hint->item);
- if (used + hint->pinned <
+ if (used + hint->pinned + hint->reserved <
div_factor(hint->key.offset, factor)) {
spin_unlock(&hint->lock);
return hint;
@@ -438,8 +437,8 @@
used = btrfs_block_group_used(&cache->item);
if (!cache->ro && block_group_bits(cache, data)) {
- free_check = div_factor(cache->key.offset, factor);
- if (used + cache->pinned < free_check) {
+ if (used + cache->pinned + cache->reserved <
+ div_factor(cache->key.offset, factor)) {
found_group = cache;
spin_unlock(&cache->lock);
goto found;
@@ -1325,24 +1324,25 @@
if (ret)
break;
- last = end + 1;
ret = get_state_private(block_group_cache, start, &ptr);
if (ret)
break;
cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
- err = write_one_cache_group(trans, root,
- path, cache);
+
+ clear_extent_bits(block_group_cache, start, end,
+ BLOCK_GROUP_DIRTY, GFP_NOFS);
+
+ err = write_one_cache_group(trans, root, path, cache);
/*
* if we fail to write the cache group, we want
* to keep it marked dirty in hopes that a later
* write will work
*/
if (err) {
+ last = end + 1;
werr = err;
continue;
}
- clear_extent_bits(block_group_cache, start, end,
- BLOCK_GROUP_DIRTY, GFP_NOFS);
}
btrfs_free_path(path);
mutex_unlock(&root->fs_info->alloc_mutex);
@@ -1387,6 +1387,7 @@
found->total_bytes = total_bytes;
found->bytes_used = bytes_used;
found->bytes_pinned = 0;
+ found->bytes_reserved = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
@@ -1784,30 +1785,51 @@
}
while (num > 0) {
cache = btrfs_lookup_block_group(fs_info, bytenr);
- if (!cache) {
- u64 first = first_logical_byte(root, bytenr);
- WARN_ON(first < bytenr);
- len = min(first - bytenr, num);
- } else {
- len = min(num, cache->key.offset -
- (bytenr - cache->key.objectid));
- }
+ BUG_ON(!cache);
+ len = min(num, cache->key.offset -
+ (bytenr - cache->key.objectid));
if (pin) {
- if (cache) {
- spin_lock(&cache->lock);
- cache->pinned += len;
- cache->space_info->bytes_pinned += len;
- spin_unlock(&cache->lock);
- }
+ spin_lock(&cache->lock);
+ cache->pinned += len;
+ cache->space_info->bytes_pinned += len;
+ spin_unlock(&cache->lock);
fs_info->total_pinned += len;
} else {
- if (cache) {
- spin_lock(&cache->lock);
- cache->pinned -= len;
- cache->space_info->bytes_pinned -= len;
- spin_unlock(&cache->lock);
- }
+ spin_lock(&cache->lock);
+ cache->pinned -= len;
+ cache->space_info->bytes_pinned -= len;
+ spin_unlock(&cache->lock);
fs_info->total_pinned -= len;
+ }
+ bytenr += len;
+ num -= len;
+ }
+ return 0;
+}
+
+static int update_reserved_extents(struct btrfs_root *root,
+ u64 bytenr, u64 num, int reserve)
+{
+ u64 len;
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
+ WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
+ while (num > 0) {
+ cache = btrfs_lookup_block_group(fs_info, bytenr);
+ BUG_ON(!cache);
+ len = min(num, cache->key.offset -
+ (bytenr - cache->key.objectid));
+ if (reserve) {
+ spin_lock(&cache->lock);
+ cache->reserved += len;
+ cache->space_info->bytes_reserved += len;
+ spin_unlock(&cache->lock);
+ } else {
+ spin_lock(&cache->lock);
+ cache->reserved -= len;
+ cache->space_info->bytes_reserved -= len;
+ spin_unlock(&cache->lock);
}
bytenr += len;
num -= len;
@@ -2518,6 +2540,7 @@
maybe_lock_mutex(root);
set_extent_dirty(&root->fs_info->free_space_cache,
start, start + len - 1, GFP_NOFS);
+ update_reserved_extents(root, start, len, 0);
maybe_unlock_mutex(root);
return 0;
}
@@ -2534,6 +2557,7 @@
ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
empty_size, hint_byte, search_end, ins,
data);
+ update_reserved_extents(root, ins->objectid, ins->offset, 1);
maybe_unlock_mutex(root);
return ret;
}
@@ -2642,6 +2666,7 @@
ret = __btrfs_alloc_reserved_extent(trans, root, parent,
root_objectid, ref_generation,
owner, owner_offset, ins);
+ update_reserved_extents(root, ins->objectid, ins->offset, 0);
maybe_unlock_mutex(root);
return ret;
}
@@ -4260,6 +4285,7 @@
spin_lock(&block_group->lock);
WARN_ON(block_group->pinned > 0);
+ WARN_ON(block_group->reserved > 0);
WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
spin_unlock(&block_group->lock);
ret = 0;
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html