This fixes restriper restore bug which triggered asserts for users with interrupted balances on rootfs btrfs. Thanks, Ilya Ilya Dryomov (2): Btrfs: restore restriper state on all mounts Btrfs: resume balance on rw (re)mounts properly fs/btrfs/disk-io.c | 39 ++++++++++++++++++---------- fs/btrfs/super.c | 4 +++ fs/btrfs/volumes.c | 73 ++++++++++++++++++++++++++++++++-------------------- fs/btrfs/volumes.h | 3 +- 4 files changed, 76 insertions(+), 43 deletions(-) -- 1.7.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2012-Jun-22 18:24 UTC
[PATCH 1/2] Btrfs: restore restriper state on all mounts
Fix a bug that triggered asserts in btrfs_balance() in both normal and
resume modes -- restriper state was not properly restored on read-only
mounts. This factors out resuming code from btrfs_restore_balance(),
which is now also called earlier in the mount sequence to avoid the
problem of some early writes getting the old profile.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
fs/btrfs/disk-io.c | 15 ++++++++++-----
fs/btrfs/volumes.c | 39 +++++++++++++++++++--------------------
fs/btrfs/volumes.h | 2 +-
3 files changed, 30 insertions(+), 26 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 77872da..dae7cd6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2354,17 +2354,22 @@ retry_root_backup:
BTRFS_CSUM_TREE_OBJECTID, csum_root);
if (ret)
goto recovery_tree_root;
-
csum_root->track_dirty = 1;
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
+ ret = btrfs_recover_balance(fs_info);
+ if (ret) {
+ printk(KERN_WARNING "btrfs: failed to recover balance\n");
+ goto fail_tree_roots;
+ }
+
ret = btrfs_init_dev_stats(fs_info);
if (ret) {
printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
ret);
- goto fail_block_groups;
+ goto fail_balance_ctl;
}
ret = btrfs_init_space_info(fs_info);
@@ -2492,9 +2497,6 @@ retry_root_backup:
err = btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
- if (!err)
- err = btrfs_recover_balance(fs_info->tree_root);
-
if (err) {
close_ctree(tree_root);
return err;
@@ -2518,6 +2520,9 @@ fail_cleaner:
fail_block_groups:
btrfs_free_block_groups(fs_info);
+fail_balance_ctl:
+ kfree(fs_info->balance_ctl);
+
fail_tree_roots:
free_root_pointers(fs_info, 1);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8a3d259..f7649b9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2867,9 +2867,8 @@ static int balance_kthread(void *data)
return ret;
}
-int btrfs_recover_balance(struct btrfs_root *tree_root)
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
{
- struct task_struct *tsk;
struct btrfs_balance_control *bctl;
struct btrfs_balance_item *item;
struct btrfs_disk_balance_args disk_bargs;
@@ -2882,29 +2881,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
if (!path)
return -ENOMEM;
- bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
- if (!bctl) {
- ret = -ENOMEM;
- goto out;
- }
-
key.objectid = BTRFS_BALANCE_OBJECTID;
key.type = BTRFS_BALANCE_ITEM_KEY;
key.offset = 0;
- ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
- goto out_bctl;
+ goto out;
if (ret > 0) { /* ret = -ENOENT; */
ret = 0;
- goto out_bctl;
+ goto out;
+ }
+
+ bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+ if (!bctl) {
+ ret = -ENOMEM;
+ goto out;
}
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
- bctl->fs_info = tree_root->fs_info;
- bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
+ bctl->fs_info = fs_info;
+ bctl->flags = btrfs_balance_flags(leaf, item);
+ bctl->flags |= BTRFS_BALANCE_RESUME;
btrfs_balance_data(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2913,14 +2913,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
- tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
- if (IS_ERR(tsk))
- ret = PTR_ERR(tsk);
- else
- goto out;
+ mutex_lock(&fs_info->volume_mutex);
+ mutex_lock(&fs_info->balance_mutex);
-out_bctl:
- kfree(bctl);
+ set_balance_control(bctl);
+
+ mutex_unlock(&fs_info->balance_mutex);
+ mutex_unlock(&fs_info->volume_mutex);
out:
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 74366f2..e1b1a64 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -281,7 +281,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64
new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
-int btrfs_recover_balance(struct btrfs_root *tree_root);
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
--
1.7.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2012-Jun-22 18:24 UTC
[PATCH 2/2] Btrfs: resume balance on rw (re)mounts properly
This introduces btrfs_resume_balance_async(), which, given that
restriper state was recovered earlier by btrfs_recover_balance(),
resumes balance in btrfs-balance kthread.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
fs/btrfs/disk-io.c | 24 +++++++++++++++---------
fs/btrfs/super.c | 4 ++++
fs/btrfs/volumes.c | 36 +++++++++++++++++++++++++++---------
fs/btrfs/volumes.h | 1 +
4 files changed, 47 insertions(+), 18 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dae7cd6..e863f58 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2490,17 +2490,23 @@ retry_root_backup:
goto fail_trans_kthread;
}
- if (!(sb->s_flags & MS_RDONLY)) {
- down_read(&fs_info->cleanup_work_sem);
- err = btrfs_orphan_cleanup(fs_info->fs_root);
- if (!err)
- err = btrfs_orphan_cleanup(fs_info->tree_root);
+ if (sb->s_flags & MS_RDONLY)
+ return 0;
+
+ down_read(&fs_info->cleanup_work_sem);
+ if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
+ (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
up_read(&fs_info->cleanup_work_sem);
+ close_ctree(tree_root);
+ return ret;
+ }
+ up_read(&fs_info->cleanup_work_sem);
- if (err) {
- close_ctree(tree_root);
- return err;
- }
+ ret = btrfs_resume_balance_async(fs_info);
+ if (ret) {
+ printk(KERN_WARNING "btrfs: failed to resume balance\n");
+ close_ctree(tree_root);
+ return ret;
}
return 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0eb9a4d..e239915 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int
*flags, char *data)
if (ret)
goto restore;
+ ret = btrfs_resume_balance_async(fs_info);
+ if (ret)
+ goto restore;
+
sb->s_flags &= ~MS_RDONLY;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f7649b9..0f1778c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2845,28 +2845,46 @@ out:
static int balance_kthread(void *data)
{
- struct btrfs_balance_control *bctl - (struct btrfs_balance_control *)data;
- struct btrfs_fs_info *fs_info = bctl->fs_info;
+ struct btrfs_fs_info *fs_info = data;
int ret = 0;
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
- set_balance_control(bctl);
-
- if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
- printk(KERN_INFO "btrfs: force skipping balance\n");
- } else {
+ if (fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: continuing balance\n");
- ret = btrfs_balance(bctl, NULL);
+ ret = btrfs_balance(fs_info->balance_ctl, NULL);
}
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
+
return ret;
}
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
+{
+ struct task_struct *tsk;
+
+ spin_lock(&fs_info->balance_lock);
+ if (!fs_info->balance_ctl) {
+ spin_unlock(&fs_info->balance_lock);
+ return 0;
+ }
+ spin_unlock(&fs_info->balance_lock);
+
+ if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+ printk(KERN_INFO "btrfs: force skipping balance\n");
+ return 0;
+ }
+
+ tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
+ if (IS_ERR(tsk))
+ return PTR_ERR(tsk);
+
+ return 0;
+}
+
int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
{
struct btrfs_balance_control *bctl;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index e1b1a64..95f6637 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -281,6 +281,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64
new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
--
1.7.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
David Sterba
2012-Jun-26 16:17 UTC
Re: [PATCH 1/2] Btrfs: restore restriper state on all mounts
On Fri, Jun 22, 2012 at 09:24:12PM +0300, Ilya Dryomov wrote:> Fix a bug that triggered asserts in btrfs_balance() in both normal and > resume modes -- restriper state was not properly restored on read-only > mounts. This factors out resuming code from btrfs_restore_balance(), > which is now also called earlier in the mount sequence to avoid the > problem of some early writes getting the old profile. > > Signed-off-by: Ilya Dryomov <idryomov@gmail.com> > --- > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 77872da..dae7cd6 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -2492,9 +2497,6 @@ retry_root_backup: > err = btrfs_orphan_cleanup(fs_info->tree_root); > up_read(&fs_info->cleanup_work_sem); > > - if (!err) > - err = btrfs_recover_balance(fs_info->tree_root); > - > if (err) { > close_ctree(tree_root); > return err; > @@ -2518,6 +2520,9 @@ fail_cleaner: > fail_block_groups: > btrfs_free_block_groups(fs_info); > > +fail_balance_ctl: > + kfree(fs_info->balance_ctl);I think you need to set fs_info->balance_ctl to NULL, otherwise this could lead to double free from free_fs_info. I was looking along the call paths and didn''t see free_fs_info called on the mount failure path: vfs->mount btrfs_mount btrfs_fill_super open_ctree (recover balance fails, frees ctl) error is propagated back to vfs, no other fs callback is done (like kill_super which does call free_fs_info). The only exit path that is not going through free_fs_info is after error from btrfs_fill_super, and this can fail from various reasons. Either I''m missing something, or we leak a btrfs_fs_info every time a mount fails ... Back to your patch, apart from the balance_ctl pointer reset, both are ok and given the number of bug reports [useless padding text here] this should go to 3.5-rc. david -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2012-Jun-26 17:35 UTC
Re: [PATCH 1/2] Btrfs: restore restriper state on all mounts
First of all, thanks for reviewing! On Tue, Jun 26, 2012 at 06:17:39PM +0200, David Sterba wrote:> On Fri, Jun 22, 2012 at 09:24:12PM +0300, Ilya Dryomov wrote: > > Fix a bug that triggered asserts in btrfs_balance() in both normal and > > resume modes -- restriper state was not properly restored on read-only > > mounts. This factors out resuming code from btrfs_restore_balance(), > > which is now also called earlier in the mount sequence to avoid the > > problem of some early writes getting the old profile. > > > > Signed-off-by: Ilya Dryomov <idryomov@gmail.com> > > --- > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > > index 77872da..dae7cd6 100644 > > --- a/fs/btrfs/disk-io.c > > +++ b/fs/btrfs/disk-io.c > > @@ -2492,9 +2497,6 @@ retry_root_backup: > > err = btrfs_orphan_cleanup(fs_info->tree_root); > > up_read(&fs_info->cleanup_work_sem); > > > > - if (!err) > > - err = btrfs_recover_balance(fs_info->tree_root); > > - > > if (err) { > > close_ctree(tree_root); > > return err; > > @@ -2518,6 +2520,9 @@ fail_cleaner: > > fail_block_groups: > > btrfs_free_block_groups(fs_info); > > > > +fail_balance_ctl: > > + kfree(fs_info->balance_ctl); > > I think you need to set fs_info->balance_ctl to NULL, otherwise this > could lead to double free from free_fs_info. I was looking along theYes, I do. I meant to call unset_balance_control(fs_info) there, but changed it to kfree(), because of the BUG_ON() in the former. unset_balance_control(), of course, sets ->balance_ctl to NULL ;)> call paths and didn''t see free_fs_info called on the mount failure path: > > vfs->mount > btrfs_mount > btrfs_fill_super > open_ctree > (recover balance fails, frees ctl) > > error is propagated back to vfs, no other fs callback is done (like > kill_super which does call free_fs_info). > > The only exit path that is not going through free_fs_info is after error > from btrfs_fill_super, and this can fail from various reasons. > > Either I''m missing something, or we leak a btrfs_fs_info every time a > mount fails ...No, we don''t, you just missed it. It''s freed from btrfs_kill_super(), which is called from deactivate_locked_super() after btrfs_fill_super() errors out.> > > Back to your patch, apart from the balance_ctl pointer reset, both are > ok and given the number of bug reports [useless padding text here] > > this should go to 3.5-rc.Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html