This patchset fixes some bugs of autodefrag. We can pull this patchset from the URL git://github.com/miaoxie/linux-btrfs.git autodefrag Thanks Miao --- Miao Xie (5): Btrfs: use slabs for auto defrag allocation Btrfs: fix unprotected defragable inode insertion Btrfs: restructure btrfs_run_defrag_inodes() Btrfs: fix freeze vs auto defrag Btrfs: fix remount vs autodefrag fs/btrfs/ctree.h | 4 + fs/btrfs/disk-io.c | 2 +- fs/btrfs/file.c | 301 +++++++++++++++++++++++++++++++++++------------------ fs/btrfs/super.c | 38 ++++++- 4 files changed, 239 insertions(+), 106 deletions(-) -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
The auto defrag allocation is in the fast path of the IO, so use slabs to improve the speed of the allocation. And besides that, it can do check for leaked objects when the module is removed. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/file.c | 28 ++++++++++++++++++++++++---- fs/btrfs/super.c | 9 ++++++++- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2ce1135..fc00ad9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3370,6 +3370,8 @@ void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); /* file.c */ +int btrfs_auto_defrag_init(void); +void btrfs_auto_defrag_exit(void); int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 110d3cb..924ce79 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -41,6 +41,7 @@ #include "compat.h" #include "volumes.h" +static struct kmem_cache *btrfs_inode_defrag_cachep; /* * when auto defrag is enabled we * queue up these defrag structs to remember which @@ -127,7 +128,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode, return; exists: - kfree(defrag); + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); return; } @@ -157,7 +158,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, else transid = BTRFS_I(inode)->root->last_trans; - defrag = kzalloc(sizeof(*defrag), GFP_NOFS); + defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS); if (!defrag) return -ENOMEM; @@ -169,7 +170,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) __btrfs_add_inode_defrag(inode, defrag); else - kfree(defrag); + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); spin_unlock(&root->fs_info->defrag_inodes_lock); return 0; } @@ -315,7 +316,8 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) next: spin_lock(&fs_info->defrag_inodes_lock); next_free: - kfree(defrag); + if (defrag) + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); } spin_unlock(&fs_info->defrag_inodes_lock); @@ -2293,3 +2295,21 @@ const struct file_operations btrfs_file_operations = { .compat_ioctl = btrfs_ioctl, #endif }; + +void btrfs_auto_defrag_exit(void) +{ + if (btrfs_inode_defrag_cachep) + kmem_cache_destroy(btrfs_inode_defrag_cachep); +} + +int btrfs_auto_defrag_init(void) +{ + btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", + sizeof(struct inode_defrag), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); + if (!btrfs_inode_defrag_cachep) + return -ENOMEM; + + return 0; +} diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 915ac14..b3b041a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1647,10 +1647,14 @@ static int __init init_btrfs_fs(void) if (err) goto free_ordered_data; - err = btrfs_interface_init(); + err = btrfs_auto_defrag_init(); if (err) goto free_delayed_inode; + err = btrfs_interface_init(); + if (err) + goto free_auto_defrag; + err = register_filesystem(&btrfs_fs_type); if (err) goto unregister_ioctl; @@ -1662,6 +1666,8 @@ static int __init init_btrfs_fs(void) unregister_ioctl: btrfs_interface_exit(); +free_auto_defrag: + btrfs_auto_defrag_exit(); free_delayed_inode: btrfs_delayed_inode_exit(); free_ordered_data: @@ -1681,6 +1687,7 @@ free_compress: static void __exit exit_btrfs_fs(void) { btrfs_destroy_cachep(); + btrfs_auto_defrag_exit(); btrfs_delayed_inode_exit(); ordered_data_exit(); extent_map_exit(); -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Miao Xie
2012-Nov-26 09:25 UTC
[PATCH 2/5] Btrfs: fix unprotected defragable inode insertion
We forget to get the defrag lock when we re-add the defragable inode, Fix it. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/file.c | 70 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 924ce79..b59f12ef 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -91,7 +91,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1, * If an existing record is found the defrag item you * pass in is freed */ -static void __btrfs_add_inode_defrag(struct inode *inode, +static int __btrfs_add_inode_defrag(struct inode *inode, struct inode_defrag *defrag) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -119,18 +119,24 @@ static void __btrfs_add_inode_defrag(struct inode *inode, entry->transid = defrag->transid; if (defrag->last_offset > entry->last_offset) entry->last_offset = defrag->last_offset; - goto exists; + return -EEXIST; } } set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); rb_link_node(&defrag->rb_node, parent, p); rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); - return; + return 0; +} -exists: - kmem_cache_free(btrfs_inode_defrag_cachep, defrag); - return; +static inline int __need_auto_defrag(struct btrfs_root *root) +{ + if (!btrfs_test_opt(root, AUTO_DEFRAG)) + return 0; + + if (btrfs_fs_closing(root->fs_info)) + return 0; + return 1; } /* @@ -143,11 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct btrfs_root *root = BTRFS_I(inode)->root; struct inode_defrag *defrag; u64 transid; + int ret; - if (!btrfs_test_opt(root, AUTO_DEFRAG)) - return 0; - - if (btrfs_fs_closing(root->fs_info)) + if (!__need_auto_defrag(root)) return 0; if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) @@ -167,15 +171,51 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, defrag->root = root->root_key.objectid; spin_lock(&root->fs_info->defrag_inodes_lock); - if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) - __btrfs_add_inode_defrag(inode, defrag); - else + if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) { + /* + * If we set IN_DEFRAG flag and evict the inode from memory, + * and then re-read this inode, this new inode doesn''t have + * IN_DEFRAG flag. At the case, we may find the existed defrag. + */ + ret = __btrfs_add_inode_defrag(inode, defrag); + if (ret) + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); + } else { kmem_cache_free(btrfs_inode_defrag_cachep, defrag); + } spin_unlock(&root->fs_info->defrag_inodes_lock); return 0; } /* + * Requeue the defrag object. If there is a defrag object that points to + * the same inode in the tree, we will merge them together (by + * __btrfs_add_inode_defrag()) and free the one that we want to requeue. + */ +void btrfs_requeue_inode_defrag(struct inode *inode, + struct inode_defrag *defrag) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + if (!__need_auto_defrag(root)) + goto out; + + /* + * Here we don''t check the IN_DEFRAG flag, because we need merge + * them together. + */ + spin_lock(&root->fs_info->defrag_inodes_lock); + ret = __btrfs_add_inode_defrag(inode, defrag); + spin_unlock(&root->fs_info->defrag_inodes_lock); + if (ret) + goto out; + return; +out: + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); +} + +/* * must be called with the defrag_inodes lock held */ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, @@ -294,7 +334,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) */ if (num_defrag == defrag_batch) { defrag->last_offset = range.start; - __btrfs_add_inode_defrag(inode, defrag); + btrfs_requeue_inode_defrag(inode, defrag); /* * we don''t want to kfree defrag, we added it back to * the rbtree @@ -308,7 +348,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) */ defrag->last_offset = 0; defrag->cycled = 1; - __btrfs_add_inode_defrag(inode, defrag); + btrfs_requeue_inode_defrag(inode, defrag); defrag = NULL; } -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
This patch restructure btrfs_run_defrag_inodes() and make the code of the auto defragment more readable. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 2 +- fs/btrfs/file.c | 197 +++++++++++++++++++++++++++++------------------------ 3 files changed, 109 insertions(+), 91 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fc00ad9..4ce24ce 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3375,6 +3375,7 @@ void btrfs_auto_defrag_exit(void); int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); +void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7cda519..3e2e6aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3279,7 +3279,7 @@ int close_ctree(struct btrfs_root *root) (atomic_read(&fs_info->defrag_running) == 0)); /* clear out the rbtree of defraggable inodes */ - btrfs_run_defrag_inodes(fs_info); + btrfs_cleanup_defrag_inodes(fs_info); if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b59f12ef..6ca2b46 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -216,11 +216,11 @@ out: } /* - * must be called with the defrag_inodes lock held + * pick the defragable inode that we want, if it doesn''t exist, we will get + * the next one. */ -struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, - u64 root, u64 ino, - struct rb_node **next) +static struct inode_defrag * +btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino) { struct inode_defrag *entry = NULL; struct inode_defrag tmp; @@ -231,7 +231,8 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, tmp.ino = ino; tmp.root = root; - p = info->defrag_inodes.rb_node; + spin_lock(&fs_info->defrag_inodes_lock); + p = fs_info->defrag_inodes.rb_node; while (p) { parent = p; entry = rb_entry(parent, struct inode_defrag, rb_node); @@ -242,52 +243,128 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, else if (ret > 0) p = parent->rb_right; else - return entry; + goto out; } - if (next) { - while (parent && __compare_inode_defrag(&tmp, entry) > 0) { - parent = rb_next(parent); + if (parent && __compare_inode_defrag(&tmp, entry) > 0) { + parent = rb_next(parent); + if (parent) entry = rb_entry(parent, struct inode_defrag, rb_node); - } - *next = parent; + else + entry = NULL; } - return NULL; +out: + if (entry) + rb_erase(parent, &fs_info->defrag_inodes); + spin_unlock(&fs_info->defrag_inodes_lock); + return entry; } -/* - * run through the list of inodes in the FS that need - * defragging - */ -int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) +void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) { struct inode_defrag *defrag; + struct rb_node *node; + + spin_lock(&fs_info->defrag_inodes_lock); + node = rb_first(&fs_info->defrag_inodes); + while (node) { + rb_erase(node, &fs_info->defrag_inodes); + defrag = rb_entry(node, struct inode_defrag, rb_node); + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); + + if (need_resched()) { + spin_unlock(&fs_info->defrag_inodes_lock); + cond_resched(); + spin_lock(&fs_info->defrag_inodes_lock); + } + + node = rb_first(&fs_info->defrag_inodes); + } + spin_unlock(&fs_info->defrag_inodes_lock); +} + +#define BTRFS_DEFRAG_BATCH 1024 + +static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, + struct inode_defrag *defrag) +{ struct btrfs_root *inode_root; struct inode *inode; - struct rb_node *n; struct btrfs_key key; struct btrfs_ioctl_defrag_range_args range; - u64 first_ino = 0; - u64 root_objectid = 0; int num_defrag; - int defrag_batch = 1024; + /* get the inode */ + key.objectid = defrag->root; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.offset = (u64)-1; + inode_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(inode_root)) { + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); + return PTR_ERR(inode_root); + } + + key.objectid = defrag->ino; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); + if (IS_ERR(inode)) { + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); + return PTR_ERR(inode); + } + + /* do a chunk of defrag */ + clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); memset(&range, 0, sizeof(range)); range.len = (u64)-1; + range.start = defrag->last_offset; + num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, + BTRFS_DEFRAG_BATCH); + /* + * if we filled the whole defrag batch, there + * must be more work to do. Queue this defrag + * again + */ + if (num_defrag == BTRFS_DEFRAG_BATCH) { + defrag->last_offset = range.start; + btrfs_requeue_inode_defrag(inode, defrag); + } else if (defrag->last_offset && !defrag->cycled) { + /* + * we didn''t fill our defrag batch, but + * we didn''t start at zero. Make sure we loop + * around to the start of the file. + */ + defrag->last_offset = 0; + defrag->cycled = 1; + btrfs_requeue_inode_defrag(inode, defrag); + } else { + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); + } + + iput(inode); + return 0; +} + +/* + * run through the list of inodes in the FS that need + * defragging + */ +int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) +{ + struct inode_defrag *defrag; + u64 first_ino = 0; + u64 root_objectid = 0; atomic_inc(&fs_info->defrag_running); - spin_lock(&fs_info->defrag_inodes_lock); while(1) { - n = NULL; + if (!__need_auto_defrag(fs_info->tree_root)) + break; /* find an inode to defrag */ - defrag = btrfs_find_defrag_inode(fs_info, root_objectid, - first_ino, &n); + defrag = btrfs_pick_defrag_inode(fs_info, root_objectid, + first_ino); if (!defrag) { - if (n) { - defrag = rb_entry(n, struct inode_defrag, - rb_node); - } else if (root_objectid || first_ino) { + if (root_objectid || first_ino) { root_objectid = 0; first_ino = 0; continue; @@ -296,71 +373,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) } } - /* remove it from the rbtree */ first_ino = defrag->ino + 1; root_objectid = defrag->root; - rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); - - if (btrfs_fs_closing(fs_info)) - goto next_free; - spin_unlock(&fs_info->defrag_inodes_lock); - - /* get the inode */ - key.objectid = defrag->root; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - key.offset = (u64)-1; - inode_root = btrfs_read_fs_root_no_name(fs_info, &key); - if (IS_ERR(inode_root)) - goto next; - - key.objectid = defrag->ino; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - key.offset = 0; - - inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); - if (IS_ERR(inode)) - goto next; - - /* do a chunk of defrag */ - clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); - range.start = defrag->last_offset; - num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, - defrag_batch); - /* - * if we filled the whole defrag batch, there - * must be more work to do. Queue this defrag - * again - */ - if (num_defrag == defrag_batch) { - defrag->last_offset = range.start; - btrfs_requeue_inode_defrag(inode, defrag); - /* - * we don''t want to kfree defrag, we added it back to - * the rbtree - */ - defrag = NULL; - } else if (defrag->last_offset && !defrag->cycled) { - /* - * we didn''t fill our defrag batch, but - * we didn''t start at zero. Make sure we loop - * around to the start of the file. - */ - defrag->last_offset = 0; - defrag->cycled = 1; - btrfs_requeue_inode_defrag(inode, defrag); - defrag = NULL; - } - - iput(inode); -next: - spin_lock(&fs_info->defrag_inodes_lock); -next_free: - if (defrag) - kmem_cache_free(btrfs_inode_defrag_cachep, defrag); + __btrfs_run_defrag_inode(fs_info, defrag); } - spin_unlock(&fs_info->defrag_inodes_lock); - atomic_dec(&fs_info->defrag_running); /* -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
If we freeze the fs, the auto defragment should not run. Fix it. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6ca2b46..40b17d0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -318,8 +318,11 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, memset(&range, 0, sizeof(range)); range.len = (u64)-1; range.start = defrag->last_offset; + + sb_start_write(fs_info->sb); num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, BTRFS_DEFRAG_BATCH); + sb_end_write(fs_info->sb); /* * if we filled the whole defrag batch, there * must be more work to do. Queue this defrag -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
If we remount the fs to close the auto defragment or make the fs R/O, we should stop the auto defragment. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ctree.h | 1 + fs/btrfs/file.c | 13 +++++++++++++ fs/btrfs/super.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4ce24ce..01d671c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1759,6 +1759,7 @@ struct btrfs_ioctl_defrag_range_args { #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) +#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ BTRFS_MOUNT_##opt) /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 40b17d0..7aaae56 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -320,8 +320,21 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, range.start = defrag->last_offset; sb_start_write(fs_info->sb); + + /* Avoid defraging files on R/O fs */ + if (!down_write_trylock(&fs_info->sb->s_umount)) { + sb_end_write(fs_info->sb); + btrfs_requeue_inode_defrag(inode, defrag); + iput(inode); + return -EBUSY; + } + + BUG_ON(fs_info->sb->s_flags & MS_RDONLY); + num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, BTRFS_DEFRAG_BATCH); + + up_write(&fs_info->sb->s_umount); sb_end_write(fs_info->sb); /* * if we filled the whole defrag batch, there diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b3b041a..2e7beee 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1189,6 +1189,32 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size); } +static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info, + unsigned long old_opts, int flags) +{ + if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && + (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || + (flags & MS_RDONLY))) { + /* wait for any defraggers to finish */ + wait_event(fs_info->transaction_wait, + (atomic_read(&fs_info->defrag_running) == 0)); + } +} + +static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, + unsigned long old_opts, int flags) +{ + /* + * We remount the fs successfully, then we need cleanup all defragable + * inodes if the autodefragment is close or the fs is R/O. + */ + if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && + (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || + (flags & MS_RDONLY))) + btrfs_cleanup_defrag_inodes(fs_info); + +} + static int btrfs_remount(struct super_block *sb, int *flags, char *data) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); @@ -1214,6 +1240,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; + btrfs_remount_prepare(fs_info, old_opts, *flags); + if (*flags & MS_RDONLY) { sb->s_flags |= MS_RDONLY; @@ -1247,6 +1275,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_RDONLY; } + btrfs_remount_cleanup(fs_info, old_opts, *flags); return 0; restore: -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Nov 26, 2012 at 02:28:13AM -0700, Miao Xie wrote:> If we remount the fs to close the auto defragment or make the fs R/O, we should > stop the auto defragment. > > Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>I''m dropping this patch, it causes a deadlock since defrag will need to reserve metadata which could call writeback_sb_nr_if_idle which does a down_read(&sb->s_umount). Figure out another way to fix this and I''ll apply it. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On fri, 14 Dec 2012 12:51:06 -0500, Josef Bacik wrote:> On Mon, Nov 26, 2012 at 02:28:13AM -0700, Miao Xie wrote: >> If we remount the fs to close the auto defragment or make the fs R/O, we should >> stop the auto defragment. >> >> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> > > I''m dropping this patch, it causes a deadlock since defrag will need to reserve > metadata which could call writeback_sb_nr_if_idle which does a > down_read(&sb->s_umount). Figure out another way to fix this and I''ll apply it. > Thanks,Hi, Josef I forget to point out this patch is based on my patches: [PATCH 1/2 RESEND] vfs: re-implement writeback_inodes_sb(_nr)_if_idle() and rename them [PATCH 2/2 RESEND] Btrfs: flush all the dirty pages if try_to_writeback_inodes_sb_nr() fails But I found you implemented a new writeback_sb_nr_if_idle()(Btrfs: fix autodefrag and umount lockup), with this new function, my patch(Btrfs: fix remount vs autodefrag) also can wrok well. Thanks Miao> > Josef > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Dec 17, 2012 at 12:24:53AM -0700, Miao Xie wrote:> On fri, 14 Dec 2012 12:51:06 -0500, Josef Bacik wrote: > > On Mon, Nov 26, 2012 at 02:28:13AM -0700, Miao Xie wrote: > >> If we remount the fs to close the auto defragment or make the fs R/O, we should > >> stop the auto defragment. > >> > >> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> > > > > I''m dropping this patch, it causes a deadlock since defrag will need to reserve > > metadata which could call writeback_sb_nr_if_idle which does a > > down_read(&sb->s_umount). Figure out another way to fix this and I''ll apply it. > > Thanks, > > Hi, Josef > > I forget to point out this patch is based on my patches: > [PATCH 1/2 RESEND] vfs: re-implement writeback_inodes_sb(_nr)_if_idle() and rename them > [PATCH 2/2 RESEND] Btrfs: flush all the dirty pages if try_to_writeback_inodes_sb_nr() fails > > But I found you implemented a new writeback_sb_nr_if_idle()(Btrfs: fix autodefrag and umount lockup), > with this new function, my patch(Btrfs: fix remount vs autodefrag) also can wrok well. >Yeah I''ll pull them on now. Once Al takes the vfs patch we can drop my local function, so when that happens send a patch to remove it please. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Any comments about this patch? Thanks Miao On mon, 26 Nov 2012 17:28:13 +0800, Miao Xie wrote:> If we remount the fs to close the auto defragment or make the fs R/O, we should > stop the auto defragment. > > Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> > --- > fs/btrfs/ctree.h | 1 + > fs/btrfs/file.c | 13 +++++++++++++ > fs/btrfs/super.c | 29 +++++++++++++++++++++++++++++ > 3 files changed, 43 insertions(+) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index 4ce24ce..01d671c 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -1759,6 +1759,7 @@ struct btrfs_ioctl_defrag_range_args { > > #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) > #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) > +#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) > #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ > BTRFS_MOUNT_##opt) > /* > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c > index 40b17d0..7aaae56 100644 > --- a/fs/btrfs/file.c > +++ b/fs/btrfs/file.c > @@ -320,8 +320,21 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, > range.start = defrag->last_offset; > > sb_start_write(fs_info->sb); > + > + /* Avoid defraging files on R/O fs */ > + if (!down_write_trylock(&fs_info->sb->s_umount)) { > + sb_end_write(fs_info->sb); > + btrfs_requeue_inode_defrag(inode, defrag); > + iput(inode); > + return -EBUSY; > + } > + > + BUG_ON(fs_info->sb->s_flags & MS_RDONLY); > + > num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, > BTRFS_DEFRAG_BATCH); > + > + up_write(&fs_info->sb->s_umount); > sb_end_write(fs_info->sb); > /* > * if we filled the whole defrag batch, there > diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c > index b3b041a..2e7beee 100644 > --- a/fs/btrfs/super.c > +++ b/fs/btrfs/super.c > @@ -1189,6 +1189,32 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, > btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size); > } > > +static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info, > + unsigned long old_opts, int flags) > +{ > + if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && > + (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || > + (flags & MS_RDONLY))) { > + /* wait for any defraggers to finish */ > + wait_event(fs_info->transaction_wait, > + (atomic_read(&fs_info->defrag_running) == 0)); > + } > +} > + > +static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, > + unsigned long old_opts, int flags) > +{ > + /* > + * We remount the fs successfully, then we need cleanup all defragable > + * inodes if the autodefragment is close or the fs is R/O. > + */ > + if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && > + (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || > + (flags & MS_RDONLY))) > + btrfs_cleanup_defrag_inodes(fs_info); > + > +} > + > static int btrfs_remount(struct super_block *sb, int *flags, char *data) > { > struct btrfs_fs_info *fs_info = btrfs_sb(sb); > @@ -1214,6 +1240,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) > if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) > return 0; > > + btrfs_remount_prepare(fs_info, old_opts, *flags); > + > if (*flags & MS_RDONLY) { > sb->s_flags |= MS_RDONLY; > > @@ -1247,6 +1275,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) > sb->s_flags &= ~MS_RDONLY; > } > > + btrfs_remount_cleanup(fs_info, old_opts, *flags); > return 0; > > restore: >-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
If we remount the fs to close the auto defragment or make the fs R/O, we should stop the auto defragment. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- Changelog v1 -> v2: - don''t use ->s_umount to avoid R/W->R/O remounting during the defragment. Instead We add a new state that tell thedefragger the fs is under remount, then the defragger pauses. --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/file.c | 5 +++++ fs/btrfs/super.c | 40 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1679051..b355bb4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -339,6 +339,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) * File system states */ #define BTRFS_FS_STATE_ERROR 0 +#define BTRFS_FS_STATE_REMOUNTING 1 /* Super block flags */ /* Errors detected */ @@ -1864,6 +1865,7 @@ struct btrfs_ioctl_defrag_range_args { #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) +#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ BTRFS_MOUNT_##opt) /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b12ba52..32b5cff 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -374,6 +374,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) atomic_inc(&fs_info->defrag_running); while(1) { + /* Pause the auto defragger. */ + if (test_bit(BTRFS_FS_STATE_REMOUNTING, + &fs_info->fs_state)) + break; + if (!__need_auto_defrag(fs_info->tree_root)) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index db1ba9a..68a29a1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1202,6 +1202,38 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, new_pool_size); } +static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info, + unsigned long old_opts, int flags) +{ + set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); + + if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && + (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || + (flags & MS_RDONLY))) { + /* wait for any defraggers to finish */ + wait_event(fs_info->transaction_wait, + (atomic_read(&fs_info->defrag_running) == 0)); + if (flags & MS_RDONLY) + sync_filesystem(fs_info->sb); + } +} + +static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, + unsigned long old_opts) +{ + /* + * We need cleanup all defragable inodes if the autodefragment is + * close or the fs is R/O. + */ + if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && + (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || + (fs_info->sb->s_flags & MS_RDONLY))) { + btrfs_cleanup_defrag_inodes(fs_info); + } + + clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); +} + static int btrfs_remount(struct super_block *sb, int *flags, char *data) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); @@ -1215,6 +1247,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) unsigned int old_metadata_ratio = fs_info->metadata_ratio; int ret; + btrfs_remount_prepare(fs_info, old_opts, *flags); + ret = btrfs_parse_options(root, data); if (ret) { ret = -EINVAL; @@ -1225,7 +1259,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) fs_info->thread_pool_size, old_thread_pool_size); if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) - return 0; + goto out; if (*flags & MS_RDONLY) { /* @@ -1280,7 +1314,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } sb->s_flags &= ~MS_RDONLY; } - +out: + btrfs_remount_cleanup(fs_info, old_opts); return 0; restore: @@ -1297,6 +1332,7 @@ restore: btrfs_resize_thread_pool(fs_info, old_thread_pool_size, fs_info->thread_pool_size); fs_info->metadata_ratio = old_metadata_ratio; + btrfs_remount_cleanup(fs_info, old_opts); return ret; } -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html