Btrfs only allocates chunks as we need them, however we do not delete chunks as we stop using them. This patch adds this capability. Whenever we clear the last bit of used space in a block group we try and mark it read only, and then when the last pinned space is finally removed we queue up the deletion work. I''ve tested this with xfstests and my enospc tests. When filling up the disk I see that we''ve allocated the entire disk of chunks, and then when I do rm * there is a bunch of space freed up. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> --- fs/btrfs/ctree.h | 3 + fs/btrfs/extent-tree.c | 148 ++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/volumes.c | 52 +++++++++++------ fs/btrfs/volumes.h | 4 + 4 files changed, 174 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8db9234..50ec64b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -839,6 +839,9 @@ struct btrfs_block_group_cache { * Today it will only have one thing on it, but that may change */ struct list_head cluster_list; + + /* Worker for deleting the block group if its empty */ + struct btrfs_work work; }; struct reloc_control; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 43aa62a..87aae66 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -64,6 +64,11 @@ static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); +static int btrfs_set_block_group_ro_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_block_group_cache + *cache); +static int set_block_group_ro_lock(struct btrfs_block_group_cache *cache); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -4052,6 +4057,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 old_val; u64 byte_in_group; int factor; + int empty = 0; /* block accounting for super block */ spin_lock(&info->delalloc_lock); @@ -4064,6 +4070,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&info->delalloc_lock); while (total) { + empty = 0; cache = btrfs_lookup_block_group(info, bytenr); if (!cache) return -1; @@ -4096,6 +4103,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { + /* + * We raced with setting the block group read only, we + * need to change it back to rw + */ + if (cache->ro) + empty = -1; old_val += num_bytes; btrfs_set_block_group_used(&cache->item, old_val); cache->reserved -= num_bytes; @@ -4106,6 +4119,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cache->space_info->lock); } else { old_val -= num_bytes; + if (old_val == 0) + empty = 1; btrfs_set_block_group_used(&cache->item, old_val); cache->pinned += num_bytes; cache->space_info->bytes_pinned += num_bytes; @@ -4118,6 +4133,29 @@ static int update_block_group(struct btrfs_trans_handle *trans, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); } + /* + * So we need to deal with 2 cases here + * + * 1) empty == 1, which means the block group is empty and + * needs to be marked ro so we can remove it later + * + * -or- + * + * 2) empty == -1, which means the block group was previously + * empty and marked read only, but not before somebody tried to + * make an allocation, so go ahead and mark it rw. + */ + switch (empty) { + case -1: + btrfs_set_block_group_rw(root, cache); + break; + case 1: + btrfs_set_block_group_ro_trans(trans, root, cache); + break; + default: + break; + } + btrfs_put_block_group(cache); total -= num_bytes; bytenr += num_bytes; @@ -4288,6 +4326,17 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) cache->reserved_pinned -= len; cache->space_info->bytes_reserved += len; } + + if (btrfs_block_group_used(&cache->item) == 0 && + cache->pinned == 0) { + int ret = 0; + + if (!cache->ro) + ret = set_block_group_ro_lock(cache); + if (!ret) + btrfs_queue_worker(&fs_info->generic_worker, + &cache->work); + } spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); } @@ -7905,7 +7954,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) return flags; } -static int set_block_group_ro(struct btrfs_block_group_cache *cache) +static int set_block_group_ro_lock(struct btrfs_block_group_cache *cache) { struct btrfs_space_info *sinfo = cache->space_info; u64 num_bytes; @@ -7914,8 +7963,6 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) if (cache->ro) return 0; - spin_lock(&sinfo->lock); - spin_lock(&cache->lock); num_bytes = cache->key.offset - cache->reserved - cache->pinned - cache->bytes_super - btrfs_block_group_used(&cache->item); @@ -7928,37 +7975,67 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) cache->ro = 1; ret = 0; } + + return ret; +} + +static int set_block_group_ro(struct btrfs_block_group_cache *cache) +{ + struct btrfs_space_info *sinfo = cache->space_info; + int ret; + + spin_lock(&sinfo->lock); + spin_lock(&cache->lock); + ret = set_block_group_ro_lock(cache); spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); + return ret; } -int btrfs_set_block_group_ro(struct btrfs_root *root, - struct btrfs_block_group_cache *cache) - +static int btrfs_set_block_group_ro_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_block_group_cache + *cache) { - struct btrfs_trans_handle *trans; u64 alloc_flags; int ret; + bool alloc = true; - BUG_ON(cache->ro); + /* + * If we''re trying to set the block group as read only in a transaction + * commit then avoid doing the chunk alloc to make lockdep happy. + */ + if (trans->transaction->in_commit) + alloc = false; - trans = btrfs_join_transaction(root, 1); - BUG_ON(IS_ERR(trans)); + if (cache->ro) + return 0; alloc_flags = update_block_group_flags(root, cache->flags); - if (alloc_flags != cache->flags) + if (alloc && alloc_flags != cache->flags) do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); ret = set_block_group_ro(cache); - if (!ret) - goto out; + if (!ret || !alloc) + return ret; alloc_flags = get_alloc_profile(root, cache->space_info->flags); ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); if (ret < 0) - goto out; - ret = set_block_group_ro(cache); -out: + return ret; + return set_block_group_ro(cache); +} + +int btrfs_set_block_group_ro(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + struct btrfs_trans_handle *trans; + int ret; + + trans = btrfs_join_transaction(root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); + ret = btrfs_set_block_group_ro_trans(trans, root, cache); btrfs_end_transaction(trans, root); return ret; } @@ -8206,6 +8283,43 @@ static void __link_block_group(struct btrfs_space_info *space_info, up_write(&space_info->groups_sem); } +static void block_group_delete_fn(struct btrfs_work *work) +{ + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info; + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + u64 chunk_tree; + u64 chunk_objectid; + int ret; + + /* + * If anything fails in here, just mark the block group as rw and + * return. + */ + cache = container_of(work, struct btrfs_block_group_cache, work); + info = cache->fs_info; + root = info->extent_root; + chunk_tree = info->chunk_root->root_key.objectid; + chunk_objectid = btrfs_block_group_chunk_objectid(&cache->item); + + if (!cache->ro) { + WARN_ON_ONCE(1); + return; + } + + trans = btrfs_start_transaction(info->extent_root, 0); + if (IS_ERR(trans)) { + btrfs_set_block_group_rw(root, cache); + return; + } + ret = btrfs_remove_chunk(trans, root, chunk_tree, chunk_objectid, + cache->key.objectid); + if (ret) + btrfs_set_block_group_rw(root, cache); + btrfs_end_transaction(trans, root); +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -8257,6 +8371,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->fs_info = info; INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); + cache->work.func = block_group_delete_fn; if (need_clear) cache->disk_cache_state = BTRFS_DC_CLEAR; @@ -8379,6 +8494,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, spin_lock_init(&cache->tree_lock); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); + cache->work.func = block_group_delete_fn; btrfs_set_block_group_used(&cache->item, bytes_used); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cc04dc1..49c055b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1726,13 +1726,13 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 return ret; } -static int btrfs_relocate_chunk(struct btrfs_root *root, - u64 chunk_tree, u64 chunk_objectid, - u64 chunk_offset) +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset) { struct extent_map_tree *em_tree; struct btrfs_root *extent_root; - struct btrfs_trans_handle *trans; struct extent_map *em; struct map_lookup *map; int ret; @@ -1742,18 +1742,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, extent_root = root->fs_info->extent_root; em_tree = &root->fs_info->mapping_tree.map_tree; - ret = btrfs_can_relocate(extent_root, chunk_offset); - if (ret) - return -ENOSPC; - - /* step one, relocate all the extents inside this chunk */ - ret = btrfs_relocate_block_group(extent_root, chunk_offset); - if (ret) - return ret; - - trans = btrfs_start_transaction(root, 0); - BUG_ON(!trans); - lock_chunks(root); /* @@ -1804,10 +1792,40 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, free_extent_map(em); unlock_chunks(root); - btrfs_end_transaction(trans, root); return 0; } +static int btrfs_relocate_chunk(struct btrfs_root *root, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset) +{ + struct btrfs_root *extent_root; + struct btrfs_trans_handle *trans; + int ret; + + root = root->fs_info->chunk_root; + extent_root = root->fs_info->extent_root; + + ret = btrfs_can_relocate(extent_root, chunk_offset); + if (ret) + return -ENOSPC; + + /* step one, relocate all the extents inside this chunk */ + ret = btrfs_relocate_block_group(extent_root, chunk_offset); + if (ret) + return ret; + + trans = btrfs_start_transaction(root, 0); + BUG_ON(!trans); + + ret = btrfs_remove_chunk(trans, root, chunk_tree, chunk_objectid, + chunk_offset); + + btrfs_end_transaction(trans, root); + + return ret; +} + static int btrfs_relocate_sys_chunks(struct btrfs_root *root) { struct btrfs_root *chunk_root = root->fs_info->chunk_root; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2b638b6..4917cc0 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -183,4 +183,8 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *max_avail); +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset); #endif -- 1.6.6.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josh Berry
2010-Nov-30 17:37 UTC
Re: [PATCH] Btrfs: dynamically remove unused block groups
On Tue, Nov 30, 2010 at 08:46, Josef Bacik <josef@redhat.com> wrote:> Btrfs only allocates chunks as we need them, however we do not delete chunks as > we stop using them. This patch adds this capability. Whenever we clear the > last bit of used space in a block group we try and mark it read only, and then > when the last pinned space is finally removed we queue up the deletion work. > I've tested this with xfstests and my enospc tests. When filling up the disk > I see that we've allocated the entire disk of chunks, and then when I do rm * > there is a bunch of space freed up. Thanks,Stupid user question: I have a btrfs filesystem on a 2.6.36 kernel that used to have ~800GB of data on it. Then I deleted ~500GB of it (moved it elsewhere), but my space usage as reported by df and the btrfs tool didn't decrease appreciably. Might this be why? Thanks, Josh> Signed-off-by: Josef Bacik <josef@redhat.com> > --- > fs/btrfs/ctree.h | 3 + > fs/btrfs/extent-tree.c | 148 ++++++++++++++++++++++++++++++++++++++++++----- > fs/btrfs/volumes.c | 52 +++++++++++------ > fs/btrfs/volumes.h | 4 + > 4 files changed, 174 insertions(+), 33 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index 8db9234..50ec64b 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -839,6 +839,9 @@ struct btrfs_block_group_cache { > * Today it will only have one thing on it, but that may change > */ > struct list_head cluster_list; > + > + /* Worker for deleting the block group if its empty */ > + struct btrfs_work work; > }; > > struct reloc_control; > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index 43aa62a..87aae66 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -64,6 +64,11 @@ static int find_next_key(struct btrfs_path *path, int level, > struct btrfs_key *key); > static void dump_space_info(struct btrfs_space_info *info, u64 bytes, > int dump_block_groups); > +static int btrfs_set_block_group_ro_trans(struct btrfs_trans_handle *trans, > + struct btrfs_root *root, > + struct btrfs_block_group_cache > + *cache); > +static int set_block_group_ro_lock(struct btrfs_block_group_cache *cache); > > static noinline int > block_group_cache_done(struct btrfs_block_group_cache *cache) > @@ -4052,6 +4057,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, > u64 old_val; > u64 byte_in_group; > int factor; > + int empty = 0; > > /* block accounting for super block */ > spin_lock(&info->delalloc_lock); > @@ -4064,6 +4070,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, > spin_unlock(&info->delalloc_lock); > > while (total) { > + empty = 0; > cache = btrfs_lookup_block_group(info, bytenr); > if (!cache) > return -1; > @@ -4096,6 +4103,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, > old_val = btrfs_block_group_used(&cache->item); > num_bytes = min(total, cache->key.offset - byte_in_group); > if (alloc) { > + /* > + * We raced with setting the block group read only, we > + * need to change it back to rw > + */ > + if (cache->ro) > + empty = -1; > old_val += num_bytes; > btrfs_set_block_group_used(&cache->item, old_val); > cache->reserved -= num_bytes; > @@ -4106,6 +4119,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, > spin_unlock(&cache->space_info->lock); > } else { > old_val -= num_bytes; > + if (old_val == 0) > + empty = 1; > btrfs_set_block_group_used(&cache->item, old_val); > cache->pinned += num_bytes; > cache->space_info->bytes_pinned += num_bytes; > @@ -4118,6 +4133,29 @@ static int update_block_group(struct btrfs_trans_handle *trans, > bytenr, bytenr + num_bytes - 1, > GFP_NOFS | __GFP_NOFAIL); > } > + /* > + * So we need to deal with 2 cases here > + * > + * 1) empty == 1, which means the block group is empty and > + * needs to be marked ro so we can remove it later > + * > + * -or- > + * > + * 2) empty == -1, which means the block group was previously > + * empty and marked read only, but not before somebody tried to > + * make an allocation, so go ahead and mark it rw. > + */ > + switch (empty) { > + case -1: > + btrfs_set_block_group_rw(root, cache); > + break; > + case 1: > + btrfs_set_block_group_ro_trans(trans, root, cache); > + break; > + default: > + break; > + } > + > btrfs_put_block_group(cache); > total -= num_bytes; > bytenr += num_bytes; > @@ -4288,6 +4326,17 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) > cache->reserved_pinned -= len; > cache->space_info->bytes_reserved += len; > } > + > + if (btrfs_block_group_used(&cache->item) == 0 && > + cache->pinned == 0) { > + int ret = 0; > + > + if (!cache->ro) > + ret = set_block_group_ro_lock(cache); > + if (!ret) > + btrfs_queue_worker(&fs_info->generic_worker, > + &cache->work); > + } > spin_unlock(&cache->lock); > spin_unlock(&cache->space_info->lock); > } > @@ -7905,7 +7954,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) > return flags; > } > > -static int set_block_group_ro(struct btrfs_block_group_cache *cache) > +static int set_block_group_ro_lock(struct btrfs_block_group_cache *cache) > { > struct btrfs_space_info *sinfo = cache->space_info; > u64 num_bytes; > @@ -7914,8 +7963,6 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) > if (cache->ro) > return 0; > > - spin_lock(&sinfo->lock); > - spin_lock(&cache->lock); > num_bytes = cache->key.offset - cache->reserved - cache->pinned - > cache->bytes_super - btrfs_block_group_used(&cache->item); > > @@ -7928,37 +7975,67 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) > cache->ro = 1; > ret = 0; > } > + > + return ret; > +} > + > +static int set_block_group_ro(struct btrfs_block_group_cache *cache) > +{ > + struct btrfs_space_info *sinfo = cache->space_info; > + int ret; > + > + spin_lock(&sinfo->lock); > + spin_lock(&cache->lock); > + ret = set_block_group_ro_lock(cache); > spin_unlock(&cache->lock); > spin_unlock(&sinfo->lock); > + > return ret; > } > > -int btrfs_set_block_group_ro(struct btrfs_root *root, > - struct btrfs_block_group_cache *cache) > - > +static int btrfs_set_block_group_ro_trans(struct btrfs_trans_handle *trans, > + struct btrfs_root *root, > + struct btrfs_block_group_cache > + *cache) > { > - struct btrfs_trans_handle *trans; > u64 alloc_flags; > int ret; > + bool alloc = true; > > - BUG_ON(cache->ro); > + /* > + * If we're trying to set the block group as read only in a transaction > + * commit then avoid doing the chunk alloc to make lockdep happy. > + */ > + if (trans->transaction->in_commit) > + alloc = false; > > - trans = btrfs_join_transaction(root, 1); > - BUG_ON(IS_ERR(trans)); > + if (cache->ro) > + return 0; > > alloc_flags = update_block_group_flags(root, cache->flags); > - if (alloc_flags != cache->flags) > + if (alloc && alloc_flags != cache->flags) > do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); > > ret = set_block_group_ro(cache); > - if (!ret) > - goto out; > + if (!ret || !alloc) > + return ret; > alloc_flags = get_alloc_profile(root, cache->space_info->flags); > ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); > if (ret < 0) > - goto out; > - ret = set_block_group_ro(cache); > -out: > + return ret; > + return set_block_group_ro(cache); > +} > + > +int btrfs_set_block_group_ro(struct btrfs_root *root, > + struct btrfs_block_group_cache *cache) > +{ > + struct btrfs_trans_handle *trans; > + int ret; > + > + trans = btrfs_join_transaction(root, 0); > + if (IS_ERR(trans)) > + return PTR_ERR(trans); > + ret = btrfs_set_block_group_ro_trans(trans, root, cache); > btrfs_end_transaction(trans, root); > return ret; > } > @@ -8206,6 +8283,43 @@ static void __link_block_group(struct btrfs_space_info *space_info, > up_write(&space_info->groups_sem); > } > > +static void block_group_delete_fn(struct btrfs_work *work) > +{ > + struct btrfs_block_group_cache *cache; > + struct btrfs_fs_info *info; > + struct btrfs_trans_handle *trans; > + struct btrfs_root *root; > + u64 chunk_tree; > + u64 chunk_objectid; > + int ret; > + > + /* > + * If anything fails in here, just mark the block group as rw and > + * return. > + */ > + cache = container_of(work, struct btrfs_block_group_cache, work); > + info = cache->fs_info; > + root = info->extent_root; > + chunk_tree = info->chunk_root->root_key.objectid; > + chunk_objectid = btrfs_block_group_chunk_objectid(&cache->item); > + > + if (!cache->ro) { > + WARN_ON_ONCE(1); > + return; > + } > + > + trans = btrfs_start_transaction(info->extent_root, 0); > + if (IS_ERR(trans)) { > + btrfs_set_block_group_rw(root, cache); > + return; > + } > + ret = btrfs_remove_chunk(trans, root, chunk_tree, chunk_objectid, > + cache->key.objectid); > + if (ret) > + btrfs_set_block_group_rw(root, cache); > + btrfs_end_transaction(trans, root); > +} > + > int btrfs_read_block_groups(struct btrfs_root *root) > { > struct btrfs_path *path; > @@ -8257,6 +8371,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) > cache->fs_info = info; > INIT_LIST_HEAD(&cache->list); > INIT_LIST_HEAD(&cache->cluster_list); > + cache->work.func = block_group_delete_fn; > > if (need_clear) > cache->disk_cache_state = BTRFS_DC_CLEAR; > @@ -8379,6 +8494,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, > spin_lock_init(&cache->tree_lock); > INIT_LIST_HEAD(&cache->list); > INIT_LIST_HEAD(&cache->cluster_list); > + cache->work.func = block_group_delete_fn; > > btrfs_set_block_group_used(&cache->item, bytes_used); > btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index cc04dc1..49c055b 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -1726,13 +1726,13 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 > return ret; > } > > -static int btrfs_relocate_chunk(struct btrfs_root *root, > - u64 chunk_tree, u64 chunk_objectid, > - u64 chunk_offset) > +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, > + struct btrfs_root *root, > + u64 chunk_tree, u64 chunk_objectid, > + u64 chunk_offset) > { > struct extent_map_tree *em_tree; > struct btrfs_root *extent_root; > - struct btrfs_trans_handle *trans; > struct extent_map *em; > struct map_lookup *map; > int ret; > @@ -1742,18 +1742,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, > extent_root = root->fs_info->extent_root; > em_tree = &root->fs_info->mapping_tree.map_tree; > > - ret = btrfs_can_relocate(extent_root, chunk_offset); > - if (ret) > - return -ENOSPC; > - > - /* step one, relocate all the extents inside this chunk */ > - ret = btrfs_relocate_block_group(extent_root, chunk_offset); > - if (ret) > - return ret; > - > - trans = btrfs_start_transaction(root, 0); > - BUG_ON(!trans); > - > lock_chunks(root); > > /* > @@ -1804,10 +1792,40 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, > free_extent_map(em); > > unlock_chunks(root); > - btrfs_end_transaction(trans, root); > return 0; > } > > +static int btrfs_relocate_chunk(struct btrfs_root *root, > + u64 chunk_tree, u64 chunk_objectid, > + u64 chunk_offset) > +{ > + struct btrfs_root *extent_root; > + struct btrfs_trans_handle *trans; > + int ret; > + > + root = root->fs_info->chunk_root; > + extent_root = root->fs_info->extent_root; > + > + ret = btrfs_can_relocate(extent_root, chunk_offset); > + if (ret) > + return -ENOSPC; > + > + /* step one, relocate all the extents inside this chunk */ > + ret = btrfs_relocate_block_group(extent_root, chunk_offset); > + if (ret) > + return ret; > + > + trans = btrfs_start_transaction(root, 0); > + BUG_ON(!trans); > + > + ret = btrfs_remove_chunk(trans, root, chunk_tree, chunk_objectid, > + chunk_offset); > + > + btrfs_end_transaction(trans, root); > + > + return ret; > +} > + > static int btrfs_relocate_sys_chunks(struct btrfs_root *root) > { > struct btrfs_root *chunk_root = root->fs_info->chunk_root; > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 2b638b6..4917cc0 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -183,4 +183,8 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); > int find_free_dev_extent(struct btrfs_trans_handle *trans, > struct btrfs_device *device, u64 num_bytes, > u64 *start, u64 *max_avail); > +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, > + struct btrfs_root *root, > + u64 chunk_tree, u64 chunk_objectid, > + u64 chunk_offset); > #endif > -- > 1.6.6.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
Josef Bacik
2010-Nov-30 19:01 UTC
Re: [PATCH] Btrfs: dynamically remove unused block groups
On Tue, Nov 30, 2010 at 09:37:17AM -0800, Josh Berry wrote:> On Tue, Nov 30, 2010 at 08:46, Josef Bacik <josef@redhat.com> wrote: > > Btrfs only allocates chunks as we need them, however we do not delete chunks as > > we stop using them. This patch adds this capability. Whenever we clear the > > last bit of used space in a block group we try and mark it read only, and then > > when the last pinned space is finally removed we queue up the deletion work. > > I''ve tested this with xfstests and my enospc tests. When filling up the disk > > I see that we''ve allocated the entire disk of chunks, and then when I do rm * > > there is a bunch of space freed up. Thanks, > > Stupid user question: > > I have a btrfs filesystem on a 2.6.36 kernel that used to have ~800GB > of data on it. Then I deleted ~500GB of it (moved it elsewhere), but > my space usage as reported by df and the btrfs tool didn''t decrease > appreciably. Might this be why? >So without this patch, with a full fs I do this [root@test1244 ~]# ./btrfs-progs-unstable/btrfs fi df /mnt/btrfs-test/ Data: total=980.25MB, used=909.91MB System, DUP: total=16.00MB, used=4.00KB System: total=4.00MB, used=0.00 Metadata, DUP: total=511.88MB, used=190.42MB Metadata: total=8.00MB, used=0.00 If I removed everything from the fs, you''d still see Data total=980.25MB, but used should be close to 0 (this is assuming no snapshots and such). With this patch if I rm -rf /mnt/btrfs/* I get this [root@test1244 ~]# ./btrfs-progs-unstable/btrfs fi df /mnt/btrfs-test/ Data: total=204.75MB, used=192.00KB System, DUP: total=16.00MB, used=4.00KB System: total=4.00MB, used=0.00 Metadata, DUP: total=307.12MB, used=24.00KB Metadata: total=8.00MB, used=0.00 So that free''d up ~700mb in data space and ~200mb in metadata space that can be allocated to either data/metadata based on your usage patterns. I hope that helps explain it. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josh Berry
2010-Nov-30 19:31 UTC
Re: [PATCH] Btrfs: dynamically remove unused block groups
On Tue, Nov 30, 2010 at 11:01, Josef Bacik <josef@redhat.com> wrote:> On Tue, Nov 30, 2010 at 09:37:17AM -0800, Josh Berry wrote: >> On Tue, Nov 30, 2010 at 08:46, Josef Bacik <josef@redhat.com> wrote: >> > Btrfs only allocates chunks as we need them, however we do not delete chunks as >> > we stop using them. This patch adds this capability. Whenever we clear the >> > last bit of used space in a block group we try and mark it read only, and then >> > when the last pinned space is finally removed we queue up the deletion work. >> > I''ve tested this with xfstests and my enospc tests. When filling up the disk >> > I see that we''ve allocated the entire disk of chunks, and then when I do rm * >> > there is a bunch of space freed up. Thanks, >> >> Stupid user question: >> >> I have a btrfs filesystem on a 2.6.36 kernel that used to have ~800GB >> of data on it. Then I deleted ~500GB of it (moved it elsewhere), but >> my space usage as reported by df and the btrfs tool didn''t decrease >> appreciably. Might this be why? >> > > So without this patch, with a full fs I do this > > [root@test1244 ~]# ./btrfs-progs-unstable/btrfs fi df /mnt/btrfs-test/ > Data: total=980.25MB, used=909.91MB > System, DUP: total=16.00MB, used=4.00KB > System: total=4.00MB, used=0.00 > Metadata, DUP: total=511.88MB, used=190.42MB > Metadata: total=8.00MB, used=0.00 > > If I removed everything from the fs, you''d still see Data total=980.25MB, but > used should be close to 0 (this is assuming no snapshots and such). With this > patch if I rm -rf /mnt/btrfs/* I get this > > [root@test1244 ~]# ./btrfs-progs-unstable/btrfs fi df /mnt/btrfs-test/ > Data: total=204.75MB, used=192.00KB > System, DUP: total=16.00MB, used=4.00KB > System: total=4.00MB, used=0.00 > Metadata, DUP: total=307.12MB, used=24.00KB > Metadata: total=8.00MB, used=0.00 > > So that free''d up ~700mb in data space and ~200mb in metadata space that can be > allocated to either data/metadata based on your usage patterns. I hope that > helps explain it.That makes sense, thanks. I was still seeing very high data-used, but it turns out there was an old snapshot I didn''t know I had, so the problem was really stupid-user error. :) -- Josh -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2010-Nov-30 19:35 UTC
Re: [PATCH] Btrfs: dynamically remove unused block groups
On Tue, Nov 30, 2010 at 11:31:17AM -0800, Josh Berry wrote:> On Tue, Nov 30, 2010 at 11:01, Josef Bacik <josef@redhat.com> wrote: > > On Tue, Nov 30, 2010 at 09:37:17AM -0800, Josh Berry wrote: > >> On Tue, Nov 30, 2010 at 08:46, Josef Bacik <josef@redhat.com> wrote: > >> > Btrfs only allocates chunks as we need them, however we do not delete chunks as > >> > we stop using them. This patch adds this capability. Whenever we clear the > >> > last bit of used space in a block group we try and mark it read only, and then > >> > when the last pinned space is finally removed we queue up the deletion work. > >> > I''ve tested this with xfstests and my enospc tests. When filling up the disk > >> > I see that we''ve allocated the entire disk of chunks, and then when I do rm * > >> > there is a bunch of space freed up. Thanks, > >> > >> Stupid user question: > >> > >> I have a btrfs filesystem on a 2.6.36 kernel that used to have ~800GB > >> of data on it. Then I deleted ~500GB of it (moved it elsewhere), but > >> my space usage as reported by df and the btrfs tool didn''t decrease > >> appreciably. Might this be why? > >> > > > > So without this patch, with a full fs I do this > > > > [root@test1244 ~]# ./btrfs-progs-unstable/btrfs fi df /mnt/btrfs-test/ > > Data: total=980.25MB, used=909.91MB > > System, DUP: total=16.00MB, used=4.00KB > > System: total=4.00MB, used=0.00 > > Metadata, DUP: total=511.88MB, used=190.42MB > > Metadata: total=8.00MB, used=0.00 > > > > If I removed everything from the fs, you''d still see Data total=980.25MB, but > > used should be close to 0 (this is assuming no snapshots and such). With this > > patch if I rm -rf /mnt/btrfs/* I get this > > > > [root@test1244 ~]# ./btrfs-progs-unstable/btrfs fi df /mnt/btrfs-test/ > > Data: total=204.75MB, used=192.00KB > > System, DUP: total=16.00MB, used=4.00KB > > System: total=4.00MB, used=0.00 > > Metadata, DUP: total=307.12MB, used=24.00KB > > Metadata: total=8.00MB, used=0.00 > > > > So that free''d up ~700mb in data space and ~200mb in metadata space that can be > > allocated to either data/metadata based on your usage patterns. I hope that > > helps explain it. > > That makes sense, thanks. > > I was still seeing very high data-used, but it turns out there was an > old snapshot I didn''t know I had, so the problem was really > stupid-user error. :) >Yeah 2 things to keep in mind, 1 if you have snapshots you are likely to not see data free''d up the way you expect it. The other thing is that because everything is COW we can''t allocate the newly free''d space until the transaction commits, so you may have to wait ~30 seconds or run sync to force the transaction to commit before you see your space actually free''d up. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Anthony Roberts
2010-Dec-01 04:53 UTC
Re: [PATCH] Btrfs: dynamically remove unused block groups
Hello, What happens in the event the filesystem has mostly been cleared out, but there''s a few things left? For example, several of the chunks might be at very low usage, but not zero. Would the user be able to defragment the filesystem to cause these chunks to be consolidated? Regards, -Anthony> Yeah 2 things to keep in mind, 1 if you have snapshots you are likely > to not see > data free''d up the way you expect it. The other thing is that > because > everything is COW we can''t allocate the newly free''d space until the > transaction > commits, so you may have to wait ~30 seconds or run sync to force the > transaction to commit before you see your space actually free''d up. > Thanks, > > Josef > -- > To unsubscribe from this list: send the line "unsubscribe > linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2010-Dec-01 08:11 UTC
Re: [PATCH] Btrfs: dynamically remove unused block groups
On Tue, Nov 30, 2010 at 09:53:41PM -0700, Anthony Roberts wrote:> Hello, > > What happens in the event the filesystem has mostly been cleared out, > but there''s a few things left? For example, several of the chunks might > be at very low usage, but not zero. Would the user be able to defragment > the filesystem to cause these chunks to be consolidated? >Yeah thats what balance is for. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html