First two patches are trivial cleanups that I''ve spotted while reading send.c, can be applied independently. The third patch contains the important bits, safety checks that the subvolumes involved in send do not accidentally lose the RO status. I haven''t seen this documented anywhere that this is mandatory, implied from how I assume send works. Please let me know if this is wrong. David Sterba (3): btrfs: send: clean up dead code btrfs: remove unused mnt from send_ctx btrfs: Check read-only status of roots during send fs/btrfs/ctree.h | 6 +++ fs/btrfs/ioctl.c | 22 +++++++++- fs/btrfs/send.c | 116 +++++++++++++++++++++++++++++------------------------- 3 files changed, 87 insertions(+), 57 deletions(-) -- 1.7.9 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Remove ifdefed code: - tlv_put for 8, 16 and 32, add a generic tempalte if needed in future - tlv_put_timespec - the btrfs_timespec fields are used - fs_path_remove obsoleted long ago Signed-off-by: David Sterba <dsterba@suse.cz> --- fs/btrfs/send.c | 58 +++++++----------------------------------------------- 1 files changed, 8 insertions(+), 50 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6837fe87f3a6..2c1dcab7420d 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -336,16 +336,6 @@ out: return ret; } -#if 0 -static void fs_path_remove(struct fs_path *p) -{ - BUG_ON(p->reversed); - while (p->start != p->end && *p->end != ''/'') - p->end--; - *p->end = 0; -} -#endif - static int fs_path_copy(struct fs_path *p, struct fs_path *from) { int ret; @@ -436,30 +426,15 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) return 0; } -#if 0 -static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value) -{ - return tlv_put(sctx, attr, &value, sizeof(value)); -} - -static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value) -{ - __le16 tmp = cpu_to_le16(value); - return tlv_put(sctx, attr, &tmp, sizeof(tmp)); -} - -static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value) -{ - __le32 tmp = cpu_to_le32(value); - return tlv_put(sctx, attr, &tmp, sizeof(tmp)); -} -#endif +#define TLV_PUT_DEFINE_INT(bits) \ + static int tlv_put_u##bits(struct send_ctx *sctx, \ + u##bits attr, u##bits value) \ + { \ + __le##bits __tmp = cpu_to_le##bits(value); \ + return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \ + } -static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value) -{ - __le64 tmp = cpu_to_le64(value); - return tlv_put(sctx, attr, &tmp, sizeof(tmp)); -} +TLV_PUT_DEFINE_INT(64) static int tlv_put_string(struct send_ctx *sctx, u16 attr, const char *str, int len) @@ -475,17 +450,6 @@ static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); } -#if 0 -static int tlv_put_timespec(struct send_ctx *sctx, u16 attr, - struct timespec *ts) -{ - struct btrfs_timespec bts; - bts.sec = cpu_to_le64(ts->tv_sec); - bts.nsec = cpu_to_le32(ts->tv_nsec); - return tlv_put(sctx, attr, &bts, sizeof(bts)); -} -#endif - static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, struct extent_buffer *eb, struct btrfs_timespec *ts) @@ -533,12 +497,6 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, if (ret < 0) \ goto tlv_put_failure; \ } while (0) -#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \ - do { \ - ret = tlv_put_timespec(sctx, attrtype, ts); \ - if (ret < 0) \ - goto tlv_put_failure; \ - } while (0) #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ do { \ ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ -- 1.7.9 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Unused since ed2590953bd06b892f0411fc94e19175d32f197a "Btrfs: stop using vfs_read in send". Signed-off-by: David Sterba <dsterba@suse.cz> --- fs/btrfs/send.c | 4 ---- 1 files changed, 0 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2c1dcab7420d..95f0fae4ce59 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -88,8 +88,6 @@ struct send_ctx { u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; u64 flags; /* ''flags'' member of btrfs_ioctl_send_args is u64 */ - struct vfsmount *mnt; - struct btrfs_root *send_root; struct btrfs_root *parent_root; struct clone_root *clone_roots; @@ -4711,8 +4709,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } - sctx->mnt = mnt_file->f_path.mnt; - sctx->send_root = send_root; sctx->clone_roots_cnt = arg->clone_sources_count; -- 1.7.9 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Sterba
2013-Dec-16 16:34 UTC
[PATCH 3/3] btrfs: Check read-only status of roots during send
All the subvolues that are involved in send must be read-only during the whole operation. The ioctl SUBVOL_SETFLAGS could be used to change the status to read-write and the result of send stream is undefined if the data change unexpectedly. Fix that by adding a refcount for all involved roots and verify that there''s no send in progress during SUBVOL_SETFLAGS ioctl call that does read-only -> read-write transition. We need refcounts because there are no restrictions on number of send parallel operations currently run on a single subvolume, be it source, parent or one of the multiple clone sources. Kernel is silent when the RO checks fail and returns EPERM. The same set of checks is done already in userspace before send starts. Signed-off-by: David Sterba <dsterba@suse.cz> --- fs/btrfs/ctree.h | 6 ++++++ fs/btrfs/ioctl.c | 22 +++++++++++++++++++--- fs/btrfs/send.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 54ab86127f7a..302753cd51cf 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1795,6 +1795,12 @@ struct btrfs_root { struct list_head ordered_extents; struct list_head ordered_root; u64 nr_ordered_extents; + + /* + * Number of currently running SEND ioctls to prevent + * manipulation with the read-only status via SUBVOL_SETFLAGS + */ + int send_in_progress; }; struct btrfs_ioctl_defrag_range_args { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a111622598b0..158680a0f94e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1698,12 +1698,28 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, goto out_drop_sem; root_flags = btrfs_root_flags(&root->root_item); - if (flags & BTRFS_SUBVOL_RDONLY) + if (flags & BTRFS_SUBVOL_RDONLY) { btrfs_set_root_flags(&root->root_item, root_flags | BTRFS_ROOT_SUBVOL_RDONLY); - else - btrfs_set_root_flags(&root->root_item, + } else { + /* + * Block RO -> RW transition if this subvolume is involved in + * send + */ + spin_lock(&root->root_item_lock); + if (root->send_in_progress == 0) { + btrfs_set_root_flags(&root->root_item, root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); + spin_unlock(&root->root_item_lock); + } else { + spin_unlock(&root->root_item_lock); + btrfs_warn(root->fs_info, + "Attempt to set subvolume %llu read-write during send", + root->root_key.objectid); + ret = -EPERM; + goto out_drop_sem; + } + } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 95f0fae4ce59..468eba26ad8c 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4629,6 +4629,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) struct send_ctx *sctx = NULL; u32 i; u64 *clone_sources_tmp = NULL; + int clone_sources_to_rollback = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -4637,6 +4638,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) fs_info = send_root->fs_info; /* + * The subvolume must remain read-only during send, protect against + * making it RW. + */ + spin_lock(&send_root->root_item_lock); + send_root->send_in_progress++; + spin_unlock(&send_root->root_item_lock); + + /* * This is done when we lookup the root, it should already be complete * by the time we get here. */ @@ -4671,6 +4680,15 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) up_read(&send_root->fs_info->extent_commit_sem); } + /* + * Userspace tools do the checks and warn the user if it''s + * not RO. + */ + if (!btrfs_root_readonly(send_root)) { + ret = -EPERM; + goto out; + } + arg = memdup_user(arg_, sizeof(*arg)); if (IS_ERR(arg)) { ret = PTR_ERR(arg); @@ -4757,6 +4775,15 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) ret = PTR_ERR(clone_root); goto out; } + clone_sources_to_rollback = i + 1; + spin_lock(&clone_root->root_item_lock); + clone_root->send_in_progress++; + if (!btrfs_root_readonly(clone_root)) { + spin_unlock(&clone_root->root_item_lock); + ret = -EPERM; + goto out; + } + spin_unlock(&clone_root->root_item_lock); sctx->clone_roots[i].root = clone_root; } vfree(clone_sources_tmp); @@ -4772,6 +4799,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) ret = PTR_ERR(sctx->parent_root); goto out; } + spin_lock(&sctx->parent_root->root_item_lock); + sctx->parent_root->send_in_progress++; + if (!btrfs_root_readonly(sctx->parent_root)) { + spin_unlock(&sctx->parent_root->root_item_lock); + ret = -EPERM; + goto out; + } + spin_unlock(&sctx->parent_root->root_item_lock); } /* @@ -4800,6 +4835,25 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) } out: + for (i = 0; i < clone_sources_to_rollback; i++) { + struct btrfs_root *r = sctx->clone_roots[i].root; + + spin_lock(&r->root_item_lock); + r->send_in_progress--; + spin_unlock(&r->root_item_lock); + } + if (!IS_ERR(sctx->parent_root)) { + struct btrfs_root *r = sctx->parent_root; + + spin_lock(&r->root_item_lock); + r->send_in_progress--; + spin_unlock(&r->root_item_lock); + } + + spin_lock(&send_root->root_item_lock); + send_root->send_in_progress--; + spin_unlock(&send_root->root_item_lock); + kfree(arg); vfree(clone_sources_tmp); -- 1.7.9 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Wang Shilong
2013-Dec-17 11:58 UTC
Re: [PATCH 3/3] btrfs: Check read-only status of roots during send
Hello David, Nice work, Before this patch for btrfs send. we have to join a transaction to avoid commit root changed. I send a plus patch that remove transaction protection from btrfs send. and a little comment below. [...] On 12/17/2013 12:34 AM, David Sterba wrote:> All the subvolues that are involved in send must be read-only during the > s via SUBVOL_SETFLAGS > + */ > + int send_in_progress;Why not use u32 here? Thanks, Wang -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Sterba
2013-Dec-17 13:09 UTC
Re: [PATCH 3/3] btrfs: Check read-only status of roots during send
On Tue, Dec 17, 2013 at 07:58:24PM +0800, Wang Shilong wrote:> Nice work, Before this patch for btrfs send. > we have to join a transaction to avoid commit root changed.That sounds like a good improvement.> I send a plus patch that remove transaction protection from btrfs send. > and a little comment below. > > [...] > On 12/17/2013 12:34 AM, David Sterba wrote: > >All the subvolues that are involved in send must be read-only during the > > s via SUBVOL_SETFLAGS > >+ */ > >+ int send_in_progress; > > Why not use u32 here?The int type should be enough to hold refs for all running sends, if this is your concern. I thought it''s a refcount, it should not go below 0 but if it does, then it should be caught. I''ll update the patch to check if send_in_progress is not negative after the decrements. thanks, david -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Subject: [PATCH 4/3] btrfs: check balance of send_in_progress Warn if the balance goes below zero, which appears to be unlikely though. Otherwise cleans up the code a bit. Signed-off-by: David Sterba <dsterba@suse.cz> --- A followup to 3/3 that adds the check if send_in_progress is not going below zero. It''s a separate patch rather than folded into 3/3 so the change is clearly visible. I''m not convinced that it''s necessary to be that cautious because it looks almost impossible to happen, but on the other hand we''d never know that it happened. fs/btrfs/send.c | 38 ++++++++++++++++++++------------------ 1 files changed, 20 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 468eba26ad8c..46ea0cdfb88b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4618,6 +4618,21 @@ out: return ret; } +static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) +{ + spin_lock(&root->root_item_lock); + root->send_in_progress--; + /* + * Not much left to do, we don''t know why it''s unbalanced and + * can''t blindly reset it to 0. + */ + if (root->send_in_progress < 0) + btrfs_err(root->fs_info, + "send_in_progres unbalanced %d root %llu\n", + root->send_in_progress, root->root_key.objectid); + spin_unlock(&root->root_item_lock); +} + long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) { int ret = 0; @@ -4835,24 +4850,11 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) } out: - for (i = 0; i < clone_sources_to_rollback; i++) { - struct btrfs_root *r = sctx->clone_roots[i].root; - - spin_lock(&r->root_item_lock); - r->send_in_progress--; - spin_unlock(&r->root_item_lock); - } - if (!IS_ERR(sctx->parent_root)) { - struct btrfs_root *r = sctx->parent_root; - - spin_lock(&r->root_item_lock); - r->send_in_progress--; - spin_unlock(&r->root_item_lock); - } - - spin_lock(&send_root->root_item_lock); - send_root->send_in_progress--; - spin_unlock(&send_root->root_item_lock); + for (i = 0; i < clone_sources_to_rollback; i++) + btrfs_root_dec_send_in_progress(sctx->clone_roots[i].root); + if (!IS_ERR(sctx->parent_root)) + btrfs_root_dec_send_in_progress(sctx->parent_root); + btrfs_root_dec_send_in_progress(send_root); kfree(arg); vfree(clone_sources_tmp); -- 1.7.9 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Possibly Parallel Threads
- [PATCH] btrfs: add "no file data" flag to btrfs send ioctl
- [PATCH] Btrfs: fix estale with btrfs send
- [PATCH] Btrfs: incompatible format change to remove hole extents V4
- [PATCH] Btrfs: stop using vfs_read in send
- [Request for review] [RFC] Add label support for snapshots and subvols