Balance will create reloc_root for each fs root, and it''s going to record last_snapshot to filter shared blocks. The side effect of setting last_snapshot is to break nocow attributes of files. So it turns out that checking last_snapshot does not always ensure that a node/leaf/file_extent is shared. That''s why shared node/leaf needs to search extent tree for number of references even after having checked last_snapshot, and updating fs/file tree works top-down so the children will always know how many references parents put on them at the moment of checking shared status. However, our nocow path does something different, it''ll firstly check if the file extent is shared, then update fs/file tree by updating inode. This ends up that the related extent record to the file extent may don''t have actual multiple references when checking shared status. ------------------------------------------------ fs_root snap \ / leaf ==> refs=2 | file_extent ==> refs=1(but actually refs is 2) After updating fs tree(or snapshot if snapshot is not RO), it''ll be fs root snap \ / cow leaf \ / file_extent ==> refs=2(we do have two parents) ------------------------------------------------ So it''ll be confused by last_snapshot from balance to think that the file extent is now shared. There are actually a couple of ways to address it, but updating fs/file tree firstly might be the easiest and cleanest one. With this, updating fs/file tree will at least make a delayed ref if the file extent is really shared by several parents, we can make nocow happy again without having to check confusing last_snapshot. Reported-by: Kyle Gates <kylegates@hotmail.com> Signed-off-by: Liu Bo <bo.li.liu@oracle.com> --- fs/btrfs/extent-tree.c | 4 ---- fs/btrfs/inode.c | 2 +- 2 files changed, 1 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index df472ab..d24c26c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2856,10 +2856,6 @@ static noinline int check_committed_ref(struct btrfs_trans_handle *trans, btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) goto out; - if (btrfs_extent_generation(leaf, ei) <- btrfs_root_last_snapshot(&root->root_item)) - goto out; - iref = (struct btrfs_extent_inline_ref *)(ei + 1); if (btrfs_extent_inline_ref_type(leaf, iref) ! BTRFS_EXTENT_DATA_REF_KEY) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 23c596c..0dc5c7d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1253,7 +1253,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, cur_offset = start; while (1) { ret = btrfs_lookup_file_extent(trans, root, path, ino, - cur_offset, 0); + cur_offset, 1); if (ret < 0) { btrfs_abort_transaction(trans, root, ret); goto error; -- 1.7.7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Balance will create reloc_root for each fs root, and it''s going to record last_snapshot to filter shared blocks. The side effect of setting last_snapshot is to break nocow attributes of files. Since the extents are not shared by the relocation tree after the balance, we can recover the old last_snapshot safely if no one snapshoted the source tree. We fix the above problem by this way. Reported-by: Kyle Gates <kylegates@hotmail.com> Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/relocation.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 395b820..934ffe6 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1305,6 +1305,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, struct extent_buffer *eb; struct btrfs_root_item *root_item; struct btrfs_key root_key; + u64 last_snap = 0; int ret; root_item = kmalloc(sizeof(*root_item), GFP_NOFS); @@ -1320,6 +1321,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, BTRFS_TREE_RELOC_OBJECTID); BUG_ON(ret); + last_snap = btrfs_root_last_snapshot(&root->root_item); btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); } else { @@ -1345,6 +1347,12 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); root_item->drop_level = 0; + /* + * abuse rtransid, it is safe because it is impossible to + * receive data into a relocation tree. + */ + btrfs_set_root_rtransid(root_item, last_snap); + btrfs_set_root_otransid(root_item, trans->transid); } btrfs_tree_unlock(eb); @@ -2273,8 +2281,12 @@ void free_reloc_roots(struct list_head *list) static noinline_for_stack int merge_reloc_roots(struct reloc_control *rc) { + struct btrfs_trans_handle *trans; struct btrfs_root *root; struct btrfs_root *reloc_root; + u64 last_snap; + u64 otransid; + u64 objectid; LIST_HEAD(reloc_roots); int found = 0; int ret = 0; @@ -2308,12 +2320,44 @@ again: } else { list_del_init(&reloc_root->root_list); } + + /* + * we keep the old last snapshod transid in rtranid when we + * created the relocation tree. + */ + last_snap = btrfs_root_rtransid(&reloc_root->root_item); + otransid = btrfs_root_otransid(&reloc_root->root_item); + objectid = reloc_root->root_key.offset; + ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); if (ret < 0) { if (list_empty(&reloc_root->root_list)) list_add_tail(&reloc_root->root_list, &reloc_roots); goto out; + } else if (!ret) { + /* + * recover the last snapshot tranid to avoid + * the space balance break NOCOW. + */ + root = read_fs_root(rc->extent_root->fs_info, + objectid); + if (IS_ERR(root)) + continue; + + if (btrfs_root_refs(&root->root_item) == 0) + continue; + + trans = btrfs_join_transaction(root); + BUG_ON(IS_ERR(trans)); + + /* Check if the fs/file tree was snapshoted or not. */ + if (btrfs_root_last_snapshot(&root->root_item) =+ otransid - 1) + btrfs_set_root_last_snapshot(&root->root_item, + last_snap); + + btrfs_end_transaction(trans, root); } } -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Monday, June 03, 2013, Liu Bo wrote:> Balance will create reloc_root for each fs root, and it''s going to > record last_snapshot to filter shared blocks. The side effect of > setting last_snapshot is to break nocow attributes of files. > > So it turns out that checking last_snapshot does not always ensure that > a node/leaf/file_extent is shared. > > That''s why shared node/leaf needs to search extent tree for number of > references > even after having checked last_snapshot, and updating fs/file tree works > top-down so the children will always know how many references parents put > on them at the moment of checking shared status. > > However, our nocow path does something different, it''ll firstly check > if the file extent is shared, then update fs/file tree by updating inode. > This ends up that the related extent record to the file extent may don''t > have actual multiple references when checking shared status. > > ------------------------------------------------ > fs_root snap > \ / > leaf ==> refs=2 > | > file_extent ==> refs=1(but actually refs is 2) > > After updating fs tree(or snapshot if snapshot is not RO), it''ll be > > fs root snap > \ / > cow leaf > \ / > file_extent ==> refs=2(we do have two parents) > ------------------------------------------------ > > So it''ll be confused by last_snapshot from balance to think that the file > extent is now shared. > > There are actually a couple of ways to address it, but updating fs/file > tree > firstly might be the easiest and cleanest one. With this, updating > fs/file > tree will at least make a delayed ref if the file extent is really shared > by > several parents, we can make nocow happy again without having to check > confusing > last_snapshot.Works here. Extents are stable after a balance. Thanks, Kyle Tested-by: Kyle Gates <kylegates@hotmail.com>> > Reported-by: Kyle Gates <kylegates@hotmail.com> > Signed-off-by: Liu Bo <bo.li.liu@oracle.com> > --- > fs/btrfs/extent-tree.c | 4 ---- > fs/btrfs/inode.c | 2 +- > 2 files changed, 1 insertions(+), 5 deletions(-) > > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index df472ab..d24c26c 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -2856,10 +2856,6 @@ static noinline int check_committed_ref(struct > btrfs_trans_handle *trans, > btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) > goto out; > > - if (btrfs_extent_generation(leaf, ei) <> - btrfs_root_last_snapshot(&root->root_item)) > - goto out; > - > iref = (struct btrfs_extent_inline_ref *)(ei + 1); > if (btrfs_extent_inline_ref_type(leaf, iref) !> BTRFS_EXTENT_DATA_REF_KEY) > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 23c596c..0dc5c7d 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -1253,7 +1253,7 @@ static noinline int run_delalloc_nocow(struct inode > *inode, > cur_offset = start; > while (1) { > ret = btrfs_lookup_file_extent(trans, root, path, ino, > - cur_offset, 0); > + cur_offset, 1); > if (ret < 0) { > btrfs_abort_transaction(trans, root, ret); > goto error; > -- > 1.7.7 > >-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wednesday, June 05, 2013 Miao Xie wrote:> Balance will create reloc_root for each fs root, and it''s going to > record last_snapshot to filter shared blocks. The side effect of > setting last_snapshot is to break nocow attributes of files. > > Since the extents are not shared by the relocation tree after the balance, > we can recover the old last_snapshot safely if no one snapshoted the > source tree. We fix the above problem by this way.This patch also fixed my problem. I tend to like this patch better as the fix lands on disk allowing nocow to function with an older kernel after being balanced. Thanks, Kyle Tested-by: Kyle Gates <kylegates@hotmail.com>> Reported-by: Kyle Gates <kylegates@hotmail.com> > Signed-off-by: Liu Bo <bo.li.liu@oracle.com> > Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> > --- > fs/btrfs/relocation.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 44 insertions(+) > > diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c > index 395b820..934ffe6 100644 > --- a/fs/btrfs/relocation.c > +++ b/fs/btrfs/relocation.c > @@ -1305,6 +1305,7 @@ static struct btrfs_root *create_reloc_root(struct > btrfs_trans_handle *trans, > struct extent_buffer *eb; > struct btrfs_root_item *root_item; > struct btrfs_key root_key; > + u64 last_snap = 0; > int ret; > > root_item = kmalloc(sizeof(*root_item), GFP_NOFS); > @@ -1320,6 +1321,7 @@ static struct btrfs_root *create_reloc_root(struct > btrfs_trans_handle *trans, > BTRFS_TREE_RELOC_OBJECTID); > BUG_ON(ret); > > + last_snap = btrfs_root_last_snapshot(&root->root_item); > btrfs_set_root_last_snapshot(&root->root_item, > trans->transid - 1); > } else { > @@ -1345,6 +1347,12 @@ static struct btrfs_root *create_reloc_root(struct > btrfs_trans_handle *trans, > memset(&root_item->drop_progress, 0, > sizeof(struct btrfs_disk_key)); > root_item->drop_level = 0; > + /* > + * abuse rtransid, it is safe because it is impossible to > + * receive data into a relocation tree. > + */ > + btrfs_set_root_rtransid(root_item, last_snap); > + btrfs_set_root_otransid(root_item, trans->transid); > } > > btrfs_tree_unlock(eb); > @@ -2273,8 +2281,12 @@ void free_reloc_roots(struct list_head *list) > static noinline_for_stack > int merge_reloc_roots(struct reloc_control *rc) > { > + struct btrfs_trans_handle *trans; > struct btrfs_root *root; > struct btrfs_root *reloc_root; > + u64 last_snap; > + u64 otransid; > + u64 objectid; > LIST_HEAD(reloc_roots); > int found = 0; > int ret = 0; > @@ -2308,12 +2320,44 @@ again: > } else { > list_del_init(&reloc_root->root_list); > } > + > + /* > + * we keep the old last snapshod transid in rtranid when we > + * created the relocation tree. > + */ > + last_snap = btrfs_root_rtransid(&reloc_root->root_item); > + otransid = btrfs_root_otransid(&reloc_root->root_item); > + objectid = reloc_root->root_key.offset; > + > ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); > if (ret < 0) { > if (list_empty(&reloc_root->root_list)) > list_add_tail(&reloc_root->root_list, > &reloc_roots); > goto out; > + } else if (!ret) { > + /* > + * recover the last snapshot tranid to avoid > + * the space balance break NOCOW. > + */ > + root = read_fs_root(rc->extent_root->fs_info, > + objectid); > + if (IS_ERR(root)) > + continue; > + > + if (btrfs_root_refs(&root->root_item) == 0) > + continue; > + > + trans = btrfs_join_transaction(root); > + BUG_ON(IS_ERR(trans)); > + > + /* Check if the fs/file tree was snapshoted or not. */ > + if (btrfs_root_last_snapshot(&root->root_item) => + otransid - 1) > + btrfs_set_root_last_snapshot(&root->root_item, > + last_snap); > + > + btrfs_end_transaction(trans, root); > } > } > > -- > 1.8.1.4 > >-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html