This patchset fixes some bugs of btrfs_ioctl(). Most of them (0004-0009) is relative to write access permission. In these patches, the last patch(0009) modified the original process of the device insertion,and made it follow the rule of the read-only filesystem. It might be issuable, so mark it RFC. We can pull this patchset from the URL git://github.com/miaoxie/linux-btrfs.git readonly Thanks Miao --- Miao Xie (9): Btrfs: pass root object into btrfs_ioctl_{start, wait}_sync() Btrfs: don''t start a new transaction when starting sync Btrfs: fix wrong return value of btrfs_wait_for_commit() Btrfs: get write access when setting the default subvolume Btrfs: get write access when doing resize fs Btrfs: get write access when removing a device Btrfs: get write access for scrub Btrfs: get write access for qgroup operations Btrfs: get write access for adding device (RFC PATCH) fs/btrfs/ioctl.c | 260 ++++++++++++++++++++++++++++++++++++------------- fs/btrfs/super.c | 5 +- fs/btrfs/transaction.c | 28 +++--- fs/btrfs/volumes.c | 26 ++--- fs/btrfs/volumes.h | 3 +- 5 files changed, 219 insertions(+), 103 deletions(-) -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Miao Xie
2012-Nov-26 08:40 UTC
[PATCH 1/9] Btrfs: pass root object into btrfs_ioctl_{start, wait}_sync()
Since we have gotten the root in the caller, just pass it into btrfs_ioctl_{start, wait}_sync() directly. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ioctl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 84bb4de..fd3fff5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3038,9 +3038,9 @@ long btrfs_ioctl_trans_end(struct file *file) return 0; } -static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) +static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, + void __user *argp) { - struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; struct btrfs_trans_handle *trans; u64 transid; int ret; @@ -3061,9 +3061,9 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp return 0; } -static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) +static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, + void __user *argp) { - struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; u64 transid; if (argp) { @@ -3770,9 +3770,9 @@ long btrfs_ioctl(struct file *file, unsigned int btrfs_sync_fs(file->f_dentry->d_sb, 1); return 0; case BTRFS_IOC_START_SYNC: - return btrfs_ioctl_start_sync(file, argp); + return btrfs_ioctl_start_sync(root, argp); case BTRFS_IOC_WAIT_SYNC: - return btrfs_ioctl_wait_sync(file, argp); + return btrfs_ioctl_wait_sync(root, argp); case BTRFS_IOC_SCRUB: return btrfs_ioctl_scrub(root, argp); case BTRFS_IOC_SCRUB_CANCEL: -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Miao Xie
2012-Nov-26 08:41 UTC
[PATCH 2/9] Btrfs: don''t start a new transaction when starting sync
If there is no running transaction in the fs, we needn''t start a new one when we want to start sync. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ioctl.c | 14 ++++++++++---- fs/btrfs/transaction.c | 13 ++++++++----- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd3fff5..5102747 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3045,16 +3045,22 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, u64 transid; int ret; - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); + trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT) + return PTR_ERR(trans); + + /* No running transaction, don''t bother */ + transid = root->fs_info->last_trans_committed; + goto out; + } transid = trans->transid; ret = btrfs_commit_transaction_async(trans, root, 0); if (ret) { btrfs_end_transaction(trans, root); return ret; } - +out: if (argp) if (copy_to_user(argp, &transid, sizeof(transid))) return -EFAULT; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 04bbfb1..350e9c4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1309,9 +1309,10 @@ static void do_async_commit(struct work_struct *work) * We''ve got freeze protection passed with the transaction. * Tell lockdep about it. */ - rwsem_acquire_read( - &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], - 0, 1, _THIS_IP_); + if (ac->newtrans->type < TRANS_JOIN_NOLOCK) + rwsem_acquire_read( + &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); current->journal_info = ac->newtrans; @@ -1349,8 +1350,10 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, * Tell lockdep we''ve released the freeze rwsem, since the * async commit thread will be the one to unlock it. */ - rwsem_release(&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], - 1, _THIS_IP_); + if (trans->type < TRANS_JOIN_NOLOCK) + rwsem_release( + &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], + 1, _THIS_IP_); schedule_delayed_work(&ac->work, 0); -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Miao Xie
2012-Nov-26 08:42 UTC
[PATCH 3/9] Btrfs: fix wrong return value of btrfs_wait_for_commit()
If the id of the existed transaction is more than the one we specified, it means the specified transaction was commited, so we should return 0, not EINVAL. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/transaction.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 350e9c4..788e079 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -461,28 +461,31 @@ static noinline void wait_for_commit(struct btrfs_root *root, int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) { struct btrfs_transaction *cur_trans = NULL, *t; - int ret; + int ret = 0; - ret = 0; if (transid) { if (transid <= root->fs_info->last_trans_committed) goto out; + ret = -EINVAL; /* find specified transaction */ spin_lock(&root->fs_info->trans_lock); list_for_each_entry(t, &root->fs_info->trans_list, list) { if (t->transid == transid) { cur_trans = t; atomic_inc(&cur_trans->use_count); + ret = 0; break; } - if (t->transid > transid) + if (t->transid > transid) { + ret = 0; break; + } } spin_unlock(&root->fs_info->trans_lock); - ret = -EINVAL; + /* The specified transaction doesn''t exist */ if (!cur_trans) - goto out; /* bad transid */ + goto out; } else { /* find newest transaction that is committing | committed */ spin_lock(&root->fs_info->trans_lock); @@ -502,9 +505,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) } wait_for_commit(root, cur_trans); - put_transaction(cur_trans); - ret = 0; out: return ret; } -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Miao Xie
2012-Nov-26 08:43 UTC
[PATCH 4/9] Btrfs: get write access when setting the default subvolume
When wen want to set the default subvolume, we must get write access, or we will change the R/O file system. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ioctl.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5102747..7d0214b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2823,12 +2823,19 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) struct btrfs_disk_key disk_key; u64 objectid = 0; u64 dir_id; + int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&objectid, argp, sizeof(objectid))) - return -EFAULT; + ret = mnt_want_write_file(file); + if (ret) + return ret; + + if (copy_from_user(&objectid, argp, sizeof(objectid))) { + ret = -EFAULT; + goto out; + } if (!objectid) objectid = root->root_key.objectid; @@ -2838,21 +2845,28 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) location.offset = (u64)-1; new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); - if (IS_ERR(new_root)) - return PTR_ERR(new_root); + if (IS_ERR(new_root)) { + ret = PTR_ERR(new_root); + goto out; + } - if (btrfs_root_refs(&new_root->root_item) == 0) - return -ENOENT; + if (btrfs_root_refs(&new_root->root_item) == 0) { + ret = -ENOENT; + goto out; + } path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } path->leave_spinning = 1; trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { btrfs_free_path(path); - return PTR_ERR(trans); + ret = PTR_ERR(trans); + goto out; } dir_id = btrfs_super_root_dir(root->fs_info->super_copy); @@ -2863,7 +2877,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_end_transaction(trans, root); printk(KERN_ERR "Umm, you don''t have the default dir item, " "this isn''t going to work\n"); - return -ENOENT; + ret = -ENOENT; + goto out; } btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); @@ -2873,8 +2888,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); btrfs_end_transaction(trans, root); - - return 0; +out: + mnt_drop_write_file(file); + return ret; } void btrfs_get_block_group_info(struct list_head *groups_list, -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Steps to reproduce: # mkfs.btrfs <partition> # mount -o ro <partition> <mnt0> # mount -o ro <partition> <mnt1> # mount -o remount,rw <mnt0> # umount <mnt0> # btrfs fi resize 10g <mnt1> We re-sized a R/O filesystem. The reason is that we just check the R/O flag of the super block object. It is not enough, because the kernel may set the R/O flag only for the mount point. We need invoke mnt_want_write_file() to do a full check. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ioctl.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7d0214b..c4bcba5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1293,12 +1293,13 @@ out_ra: return ret; } -static noinline int btrfs_ioctl_resize(struct btrfs_root *root, +static noinline int btrfs_ioctl_resize(struct file *file, void __user *arg) { u64 new_size; u64 old_size; u64 devid = 1; + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_vol_args *vol_args; struct btrfs_trans_handle *trans; struct btrfs_device *device = NULL; @@ -1307,12 +1308,13 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, int ret = 0; int mod = 0; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; + ret = mnt_want_write_file(file); + if (ret) + return ret; + mutex_lock(&root->fs_info->volume_mutex); if (root->fs_info->balance_ctl) { printk(KERN_INFO "btrfs: balance in progress\n"); @@ -1415,6 +1417,7 @@ out_free: kfree(vol_args); out: mutex_unlock(&root->fs_info->volume_mutex); + mnt_drop_write_file(file); return ret; } @@ -3759,7 +3762,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_DEFRAG_RANGE: return btrfs_ioctl_defrag(file, argp); case BTRFS_IOC_RESIZE: - return btrfs_ioctl_resize(root, argp); + return btrfs_ioctl_resize(file, argp); case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(root, argp); case BTRFS_IOC_RM_DEV: -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Steps to reproduce: # mkfs.btrfs -d single -m single <disk0> <disk1> # mount -o ro <disk0> <mnt0> # mount -o ro <disk0> <mnt1> # mount -o remount,rw <mnt0> # umount <mnt0> # btrfs device delete <disk1> <mnt1> We can remove a device from a R/O filesystem. The reason is that we just check the R/O flag of the super block object. It is not enough, because the kernel may set the R/O flag only for the mount point. We need invoke mnt_want_write_file() to do a full check. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ioctl.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c4bcba5..5921bb9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2248,16 +2248,18 @@ out: return ret; } -static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_vol_args *vol_args; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; mutex_lock(&root->fs_info->volume_mutex); if (root->fs_info->balance_ctl) { @@ -2278,6 +2280,7 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) kfree(vol_args); out: mutex_unlock(&root->fs_info->volume_mutex); + mnt_drop_write_file(file); return ret; } @@ -3766,7 +3769,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(root, argp); case BTRFS_IOC_RM_DEV: - return btrfs_ioctl_rm_dev(root, argp); + return btrfs_ioctl_rm_dev(file, argp); case BTRFS_IOC_FS_INFO: return btrfs_ioctl_fs_info(root, argp); case BTRFS_IOC_DEV_INFO: -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
We need get write access for scrub, or we will modify the R/O fs. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ioctl.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5921bb9..a7afafa 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3103,10 +3103,11 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, return btrfs_wait_for_commit(root, transid); } -static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_scrub(struct file *file, void __user *arg) { - int ret; + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_scrub_args *sa; + int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -3115,12 +3116,21 @@ static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) if (IS_ERR(sa)) return PTR_ERR(sa); + if (!(sa->flags & BTRFS_SCRUB_READONLY)) { + ret = mnt_want_write_file(file); + if (ret) + goto out; + } + ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end, &sa->progress, sa->flags & BTRFS_SCRUB_READONLY); if (copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; + if (!(sa->flags & BTRFS_SCRUB_READONLY)) + mnt_drop_write_file(file); +out: kfree(sa); return ret; } @@ -3802,7 +3812,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_WAIT_SYNC: return btrfs_ioctl_wait_sync(root, argp); case BTRFS_IOC_SCRUB: - return btrfs_ioctl_scrub(root, argp); + return btrfs_ioctl_scrub(file, argp); case BTRFS_IOC_SCRUB_CANCEL: return btrfs_ioctl_scrub_cancel(root, argp); case BTRFS_IOC_SCRUB_PROGRESS: -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
We need get write access for qgroup operations, or we will modify the R/O fs. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ioctl.c | 73 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a7afafa..e6525d1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3481,8 +3481,9 @@ out: return ret; } -static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_quota_ctl_args *sa; struct btrfs_trans_handle *trans = NULL; int ret; @@ -3491,12 +3492,15 @@ static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + goto drop_write; + } if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { trans = btrfs_start_transaction(root, 2); @@ -3529,14 +3533,16 @@ static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) if (err && !ret) ret = err; } - out: kfree(sa); +drop_write: + mnt_drop_write_file(file); return ret; } -static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_qgroup_assign_args *sa; struct btrfs_trans_handle *trans; int ret; @@ -3545,12 +3551,15 @@ static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + goto drop_write; + } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { @@ -3573,11 +3582,14 @@ static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) out: kfree(sa); +drop_write: + mnt_drop_write_file(file); return ret; } -static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_qgroup_create_args *sa; struct btrfs_trans_handle *trans; int ret; @@ -3586,12 +3598,15 @@ static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + goto drop_write; + } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { @@ -3613,11 +3628,14 @@ static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) out: kfree(sa); +drop_write: + mnt_drop_write_file(file); return ret; } -static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_qgroup_limit_args *sa; struct btrfs_trans_handle *trans; int ret; @@ -3627,12 +3645,15 @@ static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + goto drop_write; + } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { @@ -3655,6 +3676,8 @@ static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) out: kfree(sa); +drop_write: + mnt_drop_write_file(file); return ret; } @@ -3830,13 +3853,13 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_GET_DEV_STATS: return btrfs_ioctl_get_dev_stats(root, argp); case BTRFS_IOC_QUOTA_CTL: - return btrfs_ioctl_quota_ctl(root, argp); + return btrfs_ioctl_quota_ctl(file, argp); case BTRFS_IOC_QGROUP_ASSIGN: - return btrfs_ioctl_qgroup_assign(root, argp); + return btrfs_ioctl_qgroup_assign(file, argp); case BTRFS_IOC_QGROUP_CREATE: - return btrfs_ioctl_qgroup_create(root, argp); + return btrfs_ioctl_qgroup_create(file, argp); case BTRFS_IOC_QGROUP_LIMIT: - return btrfs_ioctl_qgroup_limit(root, argp); + return btrfs_ioctl_qgroup_limit(file, argp); } return -ENOTTY; -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Without this patch, we can add a device into the R/O fs. Steps to reproduce: # mkfs.btrfs -d single -m single <disk0> <disk1> # mount -o ro <disk0> <mnt0> # mount -o ro <disk0> <mnt1> # mount -o remount,rw <mnt0> # umount <mnt0> # btrfs device add <disk1> <mnt1> It is because we just check the R/O flag of the super block object. It is not enough, because the kernel may set the R/O flag only for the mount point. We need invoke mnt_want_write_file() to do the full check. But, this interface is also used for the seed fs, so I split it to two functions, one is used for the common fs, and the other is for the seed fs. The detail fix method is following: - When we mount a seed fs, we doesn''t return -EACCESS. Instead, we just set R/O flag for the super block, and tell the user that "the fs is a seed fs, we mount it on R/O state" by printk - If the fs is not a seed fs, we invoke the common device add function, which will call mnt_want_write_file() at first and get write access. - If the fs is a seed fs, we will check the flags of the mount point. - If it is R/O, it means the users mount the fs on R/O state on their own initiative, in other words, the users don''t want to change anything include adding a new device. So in this case, we will return -EROFS. - If the R/O flag of the mount point is not set, we will make a new fs which is based on the seed fs and add the device into it. At the end of this process, we will clear R/O flag of the super block, and then we can access the new fs freely. In this way, - We will forbid adding/removing any device to/from a filesystem which is mounted on R/O state(mount -o ro), even it is a seed filesystem. In short, the new process follows the read-only rule more strictly. - We can modify the new filesystem which is based on a seed filesystem immediately after it is created, needn''t do remount/umount-mount. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ioctl.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/super.c | 5 ++-- fs/btrfs/volumes.c | 26 ++++++------------ fs/btrfs/volumes.h | 3 +- 4 files changed, 83 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e6525d1..650d82d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2218,13 +2218,14 @@ out: return ret; } -static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) +static int btrfs_common_add_device(struct file *file, struct btrfs_root *root, + const char *devname) { - struct btrfs_ioctl_vol_args *vol_args; int ret; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + ret = mnt_want_write_file(file); + if (ret) + return ret; mutex_lock(&root->fs_info->volume_mutex); if (root->fs_info->balance_ctl) { @@ -2233,18 +2234,76 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) goto out; } - vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) { - ret = PTR_ERR(vol_args); + ret = btrfs_init_new_device(root, devname, false); +out: + mutex_unlock(&root->fs_info->volume_mutex); + return ret; +} + +static int btrfs_seed_add_device(struct file *file, struct btrfs_root *root, + const char *devname) +{ + struct super_block *sb = root->fs_info->sb; + int ret; + + sb_start_write(sb); + + mutex_lock(&root->fs_info->volume_mutex); + /* + * Some one may add a new device into a seed fs, and make a + * new fs, so let''s add the device by the common method. + */ + if (!root->fs_info->fs_devices->seeding) { + mutex_unlock(&root->fs_info->volume_mutex); + sb_end_write(sb); + return btrfs_common_add_device(file, root, devname); + } + + down_write(&sb->s_umount); + if (file->f_path.mnt->mnt_flags & MNT_READONLY) { + ret = -EROFS; goto out; } - vol_args->name[BTRFS_PATH_NAME_MAX] = ''\0''; - ret = btrfs_init_new_device(root, vol_args->name); + if (root->fs_info->balance_ctl) { + printk(KERN_INFO "btrfs: balance in progress\n"); + ret = -EINVAL; + goto out; + } - kfree(vol_args); + ret = btrfs_init_new_device(root, devname, true); + if (!ret) { + sb->s_flags &= ~MS_RDONLY; + printk(KERN_INFO "Btrfs created a new filesystem based on " + "seed filesystem."); + } out: + up_write(&sb->s_umount); mutex_unlock(&root->fs_info->volume_mutex); + sb_end_write(sb); + return ret; +} + +static long btrfs_ioctl_add_dev(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_ioctl_vol_args *vol_args; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + vol_args->name[BTRFS_PATH_NAME_MAX] = ''\0''; + + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_SEEDING) + ret = btrfs_seed_add_device(file, root, vol_args->name); + else + ret = btrfs_common_add_device(file, root, vol_args->name); + + kfree(vol_args); return ret; } @@ -3800,7 +3859,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_RESIZE: return btrfs_ioctl_resize(file, argp); case BTRFS_IOC_ADD_DEV: - return btrfs_ioctl_add_dev(root, argp); + return btrfs_ioctl_add_dev(file, argp); case BTRFS_IOC_RM_DEV: return btrfs_ioctl_rm_dev(file, argp); case BTRFS_IOC_FS_INFO: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 915ac14..621fd3c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1116,8 +1116,9 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, goto error_fs_info; if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { - error = -EACCES; - goto error_close_devices; + flags |= MS_RDONLY; + printk(KERN_INFO "Btrfs detected seed filesystem, force it to " + "be READONLY\n"); } bdev = fs_devices->latest_bdev; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0f5ebb7..66d6dd6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1568,6 +1568,7 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) super_flags = btrfs_super_flags(disk_super) & ~BTRFS_SUPER_FLAG_SEEDING; btrfs_set_super_flags(disk_super, super_flags); + root->fs_info->fs_state &= ~BTRFS_SUPER_FLAG_SEEDING; return 0; } @@ -1648,32 +1649,25 @@ error: return ret; } -int btrfs_init_new_device(struct btrfs_root *root, char *device_path) +int btrfs_init_new_device(struct btrfs_root *root, const char *device_path, + bool seed_fs) { struct request_queue *q; struct btrfs_trans_handle *trans; struct btrfs_device *device; struct block_device *bdev; struct list_head *devices; - struct super_block *sb = root->fs_info->sb; struct rcu_string *name; u64 total_bytes; - int seeding_dev = 0; int ret = 0; - if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) - return -EROFS; - bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, root->fs_info->bdev_holder); if (IS_ERR(bdev)) return PTR_ERR(bdev); - if (root->fs_info->fs_devices->seeding) { - seeding_dev = 1; - down_write(&sb->s_umount); + if (seed_fs) mutex_lock(&uuid_mutex); - } filemap_write_and_wait(bdev->bd_inode->i_mapping); @@ -1740,8 +1734,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->mode = FMODE_EXCL; set_blocksize(device->bdev, 4096); - if (seeding_dev) { - sb->s_flags &= ~MS_RDONLY; + if (seed_fs) { ret = btrfs_prepare_sprout(root); BUG_ON(ret); /* -ENOMEM */ } @@ -1776,7 +1769,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) total_bytes + 1); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - if (seeding_dev) { + if (seed_fs) { ret = init_first_rw_device(trans, root, device); if (ret) { btrfs_abort_transaction(trans, root, ret); @@ -1806,9 +1799,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); ret = btrfs_commit_transaction(trans, root); - if (seeding_dev) { + if (seed_fs) { mutex_unlock(&uuid_mutex); - up_write(&sb->s_umount); if (ret) /* transaction commit */ return ret; @@ -1837,10 +1829,8 @@ error_trans: kfree(device); error: blkdev_put(bdev, FMODE_EXCL); - if (seeding_dev) { + if (seed_fs) mutex_unlock(&uuid_mutex); - up_write(&sb->s_umount); - } return ret; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 53c06af..a76d598 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -279,7 +279,8 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, u8 *uuid, u8 *fsid); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); -int btrfs_init_new_device(struct btrfs_root *root, char *path); +int btrfs_init_new_device(struct btrfs_root *root, const char *path, + bool seed_fs); int btrfs_balance(struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs); int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Miao Xie
2012-Dec-12 02:52 UTC
Re: [RFC PATCH 9/9] Btrfs: get write access for adding device
Any comment about this patch? On mon, 26 Nov 2012 16:51:36 +0800, Miao Xie wrote:> Without this patch, we can add a device into the R/O fs. > > Steps to reproduce: > # mkfs.btrfs -d single -m single <disk0> <disk1> > # mount -o ro <disk0> <mnt0> > # mount -o ro <disk0> <mnt1> > # mount -o remount,rw <mnt0> > # umount <mnt0> > # btrfs device add <disk1> <mnt1> > > It is because we just check the R/O flag of the super block object. > It is not enough, because the kernel may set the R/O flag only for > the mount point. We need invoke > > mnt_want_write_file() > > to do the full check. > > But, this interface is also used for the seed fs, so I split it to > two functions, one is used for the common fs, and the other is for > the seed fs. The detail fix method is following: > > - When we mount a seed fs, we doesn''t return -EACCESS. Instead, we just > set R/O flag for the super block, and tell the user that "the fs is a > seed fs, we mount it on R/O state" by printk > - If the fs is not a seed fs, we invoke the common device add function, > which will call mnt_want_write_file() at first and get write access. > - If the fs is a seed fs, we will check the flags of the mount point. > - If it is R/O, it means the users mount the fs on R/O state on their > own initiative, in other words, the users don''t want to change anything > include adding a new device. So in this case, we will return -EROFS. > - If the R/O flag of the mount point is not set, we will make a new fs which > is based on the seed fs and add the device into it. At the end of this > process, we will clear R/O flag of the super block, and then we can > access the new fs freely. > > In this way, > - We will forbid adding/removing any device to/from a filesystem which is > mounted on R/O state(mount -o ro), even it is a seed filesystem. In short, > the new process follows the read-only rule more strictly. > - We can modify the new filesystem which is based on a seed filesystem > immediately after it is created, needn''t do remount/umount-mount. > > Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> > --- > fs/btrfs/ioctl.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++-------- > fs/btrfs/super.c | 5 ++-- > fs/btrfs/volumes.c | 26 ++++++------------ > fs/btrfs/volumes.h | 3 +- > 4 files changed, 83 insertions(+), 32 deletions(-) > > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index e6525d1..650d82d 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -2218,13 +2218,14 @@ out: > return ret; > } > > -static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) > +static int btrfs_common_add_device(struct file *file, struct btrfs_root *root, > + const char *devname) > { > - struct btrfs_ioctl_vol_args *vol_args; > int ret; > > - if (!capable(CAP_SYS_ADMIN)) > - return -EPERM; > + ret = mnt_want_write_file(file); > + if (ret) > + return ret; > > mutex_lock(&root->fs_info->volume_mutex); > if (root->fs_info->balance_ctl) { > @@ -2233,18 +2234,76 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) > goto out; > } > > - vol_args = memdup_user(arg, sizeof(*vol_args)); > - if (IS_ERR(vol_args)) { > - ret = PTR_ERR(vol_args); > + ret = btrfs_init_new_device(root, devname, false); > +out: > + mutex_unlock(&root->fs_info->volume_mutex); > + return ret; > +} > + > +static int btrfs_seed_add_device(struct file *file, struct btrfs_root *root, > + const char *devname) > +{ > + struct super_block *sb = root->fs_info->sb; > + int ret; > + > + sb_start_write(sb); > + > + mutex_lock(&root->fs_info->volume_mutex); > + /* > + * Some one may add a new device into a seed fs, and make a > + * new fs, so let''s add the device by the common method. > + */ > + if (!root->fs_info->fs_devices->seeding) { > + mutex_unlock(&root->fs_info->volume_mutex); > + sb_end_write(sb); > + return btrfs_common_add_device(file, root, devname); > + } > + > + down_write(&sb->s_umount); > + if (file->f_path.mnt->mnt_flags & MNT_READONLY) { > + ret = -EROFS; > goto out; > } > > - vol_args->name[BTRFS_PATH_NAME_MAX] = ''\0''; > - ret = btrfs_init_new_device(root, vol_args->name); > + if (root->fs_info->balance_ctl) { > + printk(KERN_INFO "btrfs: balance in progress\n"); > + ret = -EINVAL; > + goto out; > + } > > - kfree(vol_args); > + ret = btrfs_init_new_device(root, devname, true); > + if (!ret) { > + sb->s_flags &= ~MS_RDONLY; > + printk(KERN_INFO "Btrfs created a new filesystem based on " > + "seed filesystem."); > + } > out: > + up_write(&sb->s_umount); > mutex_unlock(&root->fs_info->volume_mutex); > + sb_end_write(sb); > + return ret; > +} > + > +static long btrfs_ioctl_add_dev(struct file *file, void __user *arg) > +{ > + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; > + struct btrfs_ioctl_vol_args *vol_args; > + int ret; > + > + if (!capable(CAP_SYS_ADMIN)) > + return -EPERM; > + > + vol_args = memdup_user(arg, sizeof(*vol_args)); > + if (IS_ERR(vol_args)) > + return PTR_ERR(vol_args); > + vol_args->name[BTRFS_PATH_NAME_MAX] = ''\0''; > + > + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_SEEDING) > + ret = btrfs_seed_add_device(file, root, vol_args->name); > + else > + ret = btrfs_common_add_device(file, root, vol_args->name); > + > + kfree(vol_args); > return ret; > } > > @@ -3800,7 +3859,7 @@ long btrfs_ioctl(struct file *file, unsigned int > case BTRFS_IOC_RESIZE: > return btrfs_ioctl_resize(file, argp); > case BTRFS_IOC_ADD_DEV: > - return btrfs_ioctl_add_dev(root, argp); > + return btrfs_ioctl_add_dev(file, argp); > case BTRFS_IOC_RM_DEV: > return btrfs_ioctl_rm_dev(file, argp); > case BTRFS_IOC_FS_INFO: > diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c > index 915ac14..621fd3c 100644 > --- a/fs/btrfs/super.c > +++ b/fs/btrfs/super.c > @@ -1116,8 +1116,9 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, > goto error_fs_info; > > if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { > - error = -EACCES; > - goto error_close_devices; > + flags |= MS_RDONLY; > + printk(KERN_INFO "Btrfs detected seed filesystem, force it to " > + "be READONLY\n"); > } > > bdev = fs_devices->latest_bdev; > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index 0f5ebb7..66d6dd6 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -1568,6 +1568,7 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) > super_flags = btrfs_super_flags(disk_super) & > ~BTRFS_SUPER_FLAG_SEEDING; > btrfs_set_super_flags(disk_super, super_flags); > + root->fs_info->fs_state &= ~BTRFS_SUPER_FLAG_SEEDING; > > return 0; > } > @@ -1648,32 +1649,25 @@ error: > return ret; > } > > -int btrfs_init_new_device(struct btrfs_root *root, char *device_path) > +int btrfs_init_new_device(struct btrfs_root *root, const char *device_path, > + bool seed_fs) > { > struct request_queue *q; > struct btrfs_trans_handle *trans; > struct btrfs_device *device; > struct block_device *bdev; > struct list_head *devices; > - struct super_block *sb = root->fs_info->sb; > struct rcu_string *name; > u64 total_bytes; > - int seeding_dev = 0; > int ret = 0; > > - if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) > - return -EROFS; > - > bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, > root->fs_info->bdev_holder); > if (IS_ERR(bdev)) > return PTR_ERR(bdev); > > - if (root->fs_info->fs_devices->seeding) { > - seeding_dev = 1; > - down_write(&sb->s_umount); > + if (seed_fs) > mutex_lock(&uuid_mutex); > - } > > filemap_write_and_wait(bdev->bd_inode->i_mapping); > > @@ -1740,8 +1734,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) > device->mode = FMODE_EXCL; > set_blocksize(device->bdev, 4096); > > - if (seeding_dev) { > - sb->s_flags &= ~MS_RDONLY; > + if (seed_fs) { > ret = btrfs_prepare_sprout(root); > BUG_ON(ret); /* -ENOMEM */ > } > @@ -1776,7 +1769,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) > total_bytes + 1); > mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); > > - if (seeding_dev) { > + if (seed_fs) { > ret = init_first_rw_device(trans, root, device); > if (ret) { > btrfs_abort_transaction(trans, root, ret); > @@ -1806,9 +1799,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) > btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); > ret = btrfs_commit_transaction(trans, root); > > - if (seeding_dev) { > + if (seed_fs) { > mutex_unlock(&uuid_mutex); > - up_write(&sb->s_umount); > > if (ret) /* transaction commit */ > return ret; > @@ -1837,10 +1829,8 @@ error_trans: > kfree(device); > error: > blkdev_put(bdev, FMODE_EXCL); > - if (seeding_dev) { > + if (seed_fs) > mutex_unlock(&uuid_mutex); > - up_write(&sb->s_umount); > - } > return ret; > } > > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 53c06af..a76d598 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -279,7 +279,8 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, > struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, > u8 *uuid, u8 *fsid); > int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); > -int btrfs_init_new_device(struct btrfs_root *root, char *path); > +int btrfs_init_new_device(struct btrfs_root *root, const char *path, > + bool seed_fs); > int btrfs_balance(struct btrfs_balance_control *bctl, > struct btrfs_ioctl_balance_args *bargs); > int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); >-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html