This fixes restriper restore bug which triggered asserts for users with interrupted balances on rootfs btrfs. Thanks, Ilya Ilya Dryomov (2): Btrfs: restore restriper state on all mounts Btrfs: resume balance on rw (re)mounts properly fs/btrfs/disk-io.c | 39 ++++++++++++++++++---------- fs/btrfs/super.c | 4 +++ fs/btrfs/volumes.c | 73 ++++++++++++++++++++++++++++++++-------------------- fs/btrfs/volumes.h | 3 +- 4 files changed, 76 insertions(+), 43 deletions(-) -- 1.7.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2012-Jun-22 18:24 UTC
[PATCH 1/2] Btrfs: restore restriper state on all mounts
Fix a bug that triggered asserts in btrfs_balance() in both normal and resume modes -- restriper state was not properly restored on read-only mounts. This factors out resuming code from btrfs_restore_balance(), which is now also called earlier in the mount sequence to avoid the problem of some early writes getting the old profile. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/btrfs/disk-io.c | 15 ++++++++++----- fs/btrfs/volumes.c | 39 +++++++++++++++++++-------------------- fs/btrfs/volumes.h | 2 +- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 77872da..dae7cd6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2354,17 +2354,22 @@ retry_root_backup: BTRFS_CSUM_TREE_OBJECTID, csum_root); if (ret) goto recovery_tree_root; - csum_root->track_dirty = 1; fs_info->generation = generation; fs_info->last_trans_committed = generation; + ret = btrfs_recover_balance(fs_info); + if (ret) { + printk(KERN_WARNING "btrfs: failed to recover balance\n"); + goto fail_tree_roots; + } + ret = btrfs_init_dev_stats(fs_info); if (ret) { printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", ret); - goto fail_block_groups; + goto fail_balance_ctl; } ret = btrfs_init_space_info(fs_info); @@ -2492,9 +2497,6 @@ retry_root_backup: err = btrfs_orphan_cleanup(fs_info->tree_root); up_read(&fs_info->cleanup_work_sem); - if (!err) - err = btrfs_recover_balance(fs_info->tree_root); - if (err) { close_ctree(tree_root); return err; @@ -2518,6 +2520,9 @@ fail_cleaner: fail_block_groups: btrfs_free_block_groups(fs_info); +fail_balance_ctl: + kfree(fs_info->balance_ctl); + fail_tree_roots: free_root_pointers(fs_info, 1); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8a3d259..f7649b9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2867,9 +2867,8 @@ static int balance_kthread(void *data) return ret; } -int btrfs_recover_balance(struct btrfs_root *tree_root) +int btrfs_recover_balance(struct btrfs_fs_info *fs_info) { - struct task_struct *tsk; struct btrfs_balance_control *bctl; struct btrfs_balance_item *item; struct btrfs_disk_balance_args disk_bargs; @@ -2882,29 +2881,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root) if (!path) return -ENOMEM; - bctl = kzalloc(sizeof(*bctl), GFP_NOFS); - if (!bctl) { - ret = -ENOMEM; - goto out; - } - key.objectid = BTRFS_BALANCE_OBJECTID; key.type = BTRFS_BALANCE_ITEM_KEY; key.offset = 0; - ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); if (ret < 0) - goto out_bctl; + goto out; if (ret > 0) { /* ret = -ENOENT; */ ret = 0; - goto out_bctl; + goto out; + } + + bctl = kzalloc(sizeof(*bctl), GFP_NOFS); + if (!bctl) { + ret = -ENOMEM; + goto out; } leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); - bctl->fs_info = tree_root->fs_info; - bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; + bctl->fs_info = fs_info; + bctl->flags = btrfs_balance_flags(leaf, item); + bctl->flags |= BTRFS_BALANCE_RESUME; btrfs_balance_data(leaf, item, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); @@ -2913,14 +2913,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root) btrfs_balance_sys(leaf, item, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); - tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); - if (IS_ERR(tsk)) - ret = PTR_ERR(tsk); - else - goto out; + mutex_lock(&fs_info->volume_mutex); + mutex_lock(&fs_info->balance_mutex); -out_bctl: - kfree(bctl); + set_balance_control(bctl); + + mutex_unlock(&fs_info->balance_mutex); + mutex_unlock(&fs_info->volume_mutex); out: btrfs_free_path(path); return ret; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 74366f2..e1b1a64 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -281,7 +281,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_balance(struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs); -int btrfs_recover_balance(struct btrfs_root *tree_root); +int btrfs_recover_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); -- 1.7.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2012-Jun-22 18:24 UTC
[PATCH 2/2] Btrfs: resume balance on rw (re)mounts properly
This introduces btrfs_resume_balance_async(), which, given that restriper state was recovered earlier by btrfs_recover_balance(), resumes balance in btrfs-balance kthread. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/btrfs/disk-io.c | 24 +++++++++++++++--------- fs/btrfs/super.c | 4 ++++ fs/btrfs/volumes.c | 36 +++++++++++++++++++++++++++--------- fs/btrfs/volumes.h | 1 + 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dae7cd6..e863f58 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2490,17 +2490,23 @@ retry_root_backup: goto fail_trans_kthread; } - if (!(sb->s_flags & MS_RDONLY)) { - down_read(&fs_info->cleanup_work_sem); - err = btrfs_orphan_cleanup(fs_info->fs_root); - if (!err) - err = btrfs_orphan_cleanup(fs_info->tree_root); + if (sb->s_flags & MS_RDONLY) + return 0; + + down_read(&fs_info->cleanup_work_sem); + if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || + (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { up_read(&fs_info->cleanup_work_sem); + close_ctree(tree_root); + return ret; + } + up_read(&fs_info->cleanup_work_sem); - if (err) { - close_ctree(tree_root); - return err; - } + ret = btrfs_resume_balance_async(fs_info); + if (ret) { + printk(KERN_WARNING "btrfs: failed to resume balance\n"); + close_ctree(tree_root); + return ret; } return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0eb9a4d..e239915 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; + ret = btrfs_resume_balance_async(fs_info); + if (ret) + goto restore; + sb->s_flags &= ~MS_RDONLY; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f7649b9..0f1778c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2845,28 +2845,46 @@ out: static int balance_kthread(void *data) { - struct btrfs_balance_control *bctl - (struct btrfs_balance_control *)data; - struct btrfs_fs_info *fs_info = bctl->fs_info; + struct btrfs_fs_info *fs_info = data; int ret = 0; mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->balance_mutex); - set_balance_control(bctl); - - if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { - printk(KERN_INFO "btrfs: force skipping balance\n"); - } else { + if (fs_info->balance_ctl) { printk(KERN_INFO "btrfs: continuing balance\n"); - ret = btrfs_balance(bctl, NULL); + ret = btrfs_balance(fs_info->balance_ctl, NULL); } mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); + return ret; } +int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) +{ + struct task_struct *tsk; + + spin_lock(&fs_info->balance_lock); + if (!fs_info->balance_ctl) { + spin_unlock(&fs_info->balance_lock); + return 0; + } + spin_unlock(&fs_info->balance_lock); + + if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { + printk(KERN_INFO "btrfs: force skipping balance\n"); + return 0; + } + + tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); + if (IS_ERR(tsk)) + return PTR_ERR(tsk); + + return 0; +} + int btrfs_recover_balance(struct btrfs_fs_info *fs_info) { struct btrfs_balance_control *bctl; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index e1b1a64..95f6637 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -281,6 +281,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_balance(struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs); +int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); int btrfs_recover_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); -- 1.7.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Sterba
2012-Jun-26 16:17 UTC
Re: [PATCH 1/2] Btrfs: restore restriper state on all mounts
On Fri, Jun 22, 2012 at 09:24:12PM +0300, Ilya Dryomov wrote:> Fix a bug that triggered asserts in btrfs_balance() in both normal and > resume modes -- restriper state was not properly restored on read-only > mounts. This factors out resuming code from btrfs_restore_balance(), > which is now also called earlier in the mount sequence to avoid the > problem of some early writes getting the old profile. > > Signed-off-by: Ilya Dryomov <idryomov@gmail.com> > --- > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 77872da..dae7cd6 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -2492,9 +2497,6 @@ retry_root_backup: > err = btrfs_orphan_cleanup(fs_info->tree_root); > up_read(&fs_info->cleanup_work_sem); > > - if (!err) > - err = btrfs_recover_balance(fs_info->tree_root); > - > if (err) { > close_ctree(tree_root); > return err; > @@ -2518,6 +2520,9 @@ fail_cleaner: > fail_block_groups: > btrfs_free_block_groups(fs_info); > > +fail_balance_ctl: > + kfree(fs_info->balance_ctl);I think you need to set fs_info->balance_ctl to NULL, otherwise this could lead to double free from free_fs_info. I was looking along the call paths and didn''t see free_fs_info called on the mount failure path: vfs->mount btrfs_mount btrfs_fill_super open_ctree (recover balance fails, frees ctl) error is propagated back to vfs, no other fs callback is done (like kill_super which does call free_fs_info). The only exit path that is not going through free_fs_info is after error from btrfs_fill_super, and this can fail from various reasons. Either I''m missing something, or we leak a btrfs_fs_info every time a mount fails ... Back to your patch, apart from the balance_ctl pointer reset, both are ok and given the number of bug reports [useless padding text here] this should go to 3.5-rc. david -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2012-Jun-26 17:35 UTC
Re: [PATCH 1/2] Btrfs: restore restriper state on all mounts
First of all, thanks for reviewing! On Tue, Jun 26, 2012 at 06:17:39PM +0200, David Sterba wrote:> On Fri, Jun 22, 2012 at 09:24:12PM +0300, Ilya Dryomov wrote: > > Fix a bug that triggered asserts in btrfs_balance() in both normal and > > resume modes -- restriper state was not properly restored on read-only > > mounts. This factors out resuming code from btrfs_restore_balance(), > > which is now also called earlier in the mount sequence to avoid the > > problem of some early writes getting the old profile. > > > > Signed-off-by: Ilya Dryomov <idryomov@gmail.com> > > --- > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > > index 77872da..dae7cd6 100644 > > --- a/fs/btrfs/disk-io.c > > +++ b/fs/btrfs/disk-io.c > > @@ -2492,9 +2497,6 @@ retry_root_backup: > > err = btrfs_orphan_cleanup(fs_info->tree_root); > > up_read(&fs_info->cleanup_work_sem); > > > > - if (!err) > > - err = btrfs_recover_balance(fs_info->tree_root); > > - > > if (err) { > > close_ctree(tree_root); > > return err; > > @@ -2518,6 +2520,9 @@ fail_cleaner: > > fail_block_groups: > > btrfs_free_block_groups(fs_info); > > > > +fail_balance_ctl: > > + kfree(fs_info->balance_ctl); > > I think you need to set fs_info->balance_ctl to NULL, otherwise this > could lead to double free from free_fs_info. I was looking along theYes, I do. I meant to call unset_balance_control(fs_info) there, but changed it to kfree(), because of the BUG_ON() in the former. unset_balance_control(), of course, sets ->balance_ctl to NULL ;)> call paths and didn''t see free_fs_info called on the mount failure path: > > vfs->mount > btrfs_mount > btrfs_fill_super > open_ctree > (recover balance fails, frees ctl) > > error is propagated back to vfs, no other fs callback is done (like > kill_super which does call free_fs_info). > > The only exit path that is not going through free_fs_info is after error > from btrfs_fill_super, and this can fail from various reasons. > > Either I''m missing something, or we leak a btrfs_fs_info every time a > mount fails ...No, we don''t, you just missed it. It''s freed from btrfs_kill_super(), which is called from deactivate_locked_super() after btrfs_fill_super() errors out.> > > Back to your patch, apart from the balance_ctl pointer reset, both are > ok and given the number of bug reports [useless padding text here] > > this should go to 3.5-rc.Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html