thr3ads.net - Btrfs devel - [PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize [Jul 2010]

If this information is useful, please help other people find it:
Share via:

Miao Xie

2010-Jul-13 08:55 UTC

[PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize

oops occured when we ran the following commands on the filesystem whose
leafsize is greater than its nodesize.
  # mkfs.btrfs -l 8192 /dev/sda1
                  ^^^^ 2 * PAGESIZE
  # mount /dev/sda1 /mnt
  # cat /dev/zero > /mnt/tmp_file0
  # umount /mnt
  Oops occured.
(Sometimes we must do the loop of mount/umount several times to hit this bug)

Oops infomation:
------------[ cut here ]------------
WARNING: at fs/btrfs/extent_io.c:3685 copy_extent_buffer+0x48/0x192()
Hardware name: FFFFFFFFFF
Modules linked in: [snip]
Pid: 3743, comm: umount Not tainted 2.6.35-rc4 #22
Call Trace:
  [<ffffffff8103782e>] warn_slowpath_common+0x80/0x98
  [<ffffffff8103785b>] warn_slowpath_null+0x15/0x17
  [<ffffffff811713a3>] copy_extent_buffer+0x48/0x192
  [<ffffffff81140488>] __btrfs_cow_block+0x16f/0x559
  [<ffffffff81140ee7>] btrfs_cow_block+0x189/0x1a6
  [<ffffffff81157cf0>] commit_cowonly_roots+0x54/0x193
  [<ffffffff811587d5>] btrfs_commit_transaction+0x380/0x610
  [<ffffffff81050a67>] ? autoremove_wake_function+0x0/0x34
  [<ffffffff811554bf>] btrfs_commit_super+0xa2/0xc3
  [<ffffffff8115551a>] close_ctree+0x3a/0x330
  [<ffffffff810545e8>] ? up_write+0x1e/0x36
  [<ffffffff810f4818>] ? invalidate_inodes+0x120/0x132
  [<ffffffff8113b233>] btrfs_put_super+0x18/0x27
  [<ffffffff810e395c>] generic_shutdown_super+0x51/0xd2
  [<ffffffff810e3a28>] kill_anon_super+0x11/0x4f
  [<ffffffff810e2af8>] deactivate_locked_super+0x21/0x41
  [<ffffffff810e2cd0>] deactivate_super+0x40/0x44
  [<ffffffff810f7a4b>] mntput_no_expire+0xb8/0xe5
  [<ffffffff810f7fea>] sys_umount+0x2c8/0x2f3
  [<ffffffff8106062a>] ? trace_hardirqs_on_caller+0x10c/0x130
  [<ffffffff81001f2b>] system_call_fastpath+0x16/0x1b
---[ end trace 5f7f4dbc8bcadbc3 ]---

The reason is that:
In order to reuse the extent buffer, the btrfs doesn''t release the
extent
buffer, When the btrfs releases its device space. So if the btrfs allocates
the device space that has been released just now, the btrfs will get the old
extent buffer mapping to the device space.

But if the length of the new space is greator than the old extent buffer, a
BUG() will be touched.

The patch fixes this problem by change the length of the old extent buffer.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
  fs/btrfs/extent_io.c |   90 +++++++++++++++++++++++++++++++++++++++++++++++--
  1 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 70b7cc5..b4f1c42 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3130,6 +3130,45 @@ static inline void btrfs_release_extent_buffer(struct
extent_buffer *eb)
  	__free_extent_buffer(eb);
  }
  
+/*
+ * we may reuse a extent buffer whose device space has been released, if the
len
+ * of the extent buffer is smaller than we expect, we must enlarge the extent
+ * buffer, and before doing that, we must release the extent buffer that
+ * intersects it.
+ *
+ * Don''t worry about the state of the extent buffer that is going to
be release.
+ * because it is just an image left in the memory, and its device space has
been
+ * released, or the btrfs can''t allocate its device space for other
extent
+ * buffer.
+ *
+ * Note: Must hold io_tree->buffer_lock
+ */
+static int btrfs_enlarge_extent_buffer(struct extent_io_tree *tree,
+					struct extent_buffer *eb,
+					unsigned long newlen)
+{
+	struct rb_node *next;
+	struct extent_buffer *next_eb;
+
+	eb->len = newlen;
+	set_page_extent_head(eb->first_page, newlen);
+
+	next = rb_next(&eb->rb_node);
+	while (next) {
+		next_eb = rb_entry(next, struct extent_buffer, rb_node);
+		if (next_eb->start >= eb->start + eb->len)
+			break;
+
+		if (atomic_read(&next_eb->refs) > 1)
+			return 1;
+
+		next = rb_next(next);
+		rb_erase(&next_eb->rb_node, &tree->buffer);
+		btrfs_release_extent_buffer(next_eb);
+	}
+	return 0;
+}
+
  struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
  					  u64 start, unsigned long len,
  					  struct page *page0,
@@ -3147,10 +3186,49 @@ struct extent_buffer *alloc_extent_buffer(struct
extent_io_tree *tree,
  	spin_lock(&tree->buffer_lock);
  	eb = buffer_search(tree, start);
  	if (eb) {
-		atomic_inc(&eb->refs);
-		spin_unlock(&tree->buffer_lock);
-		mark_page_accessed(eb->first_page);
-		return eb;
+		/*
+		 * If this extent buffer''s device space has been released some
+		 * time ago, and is reallocated again to store other metadata,
+		 * but it hasn''t been release, we may get the old entent buffer
+		 * and reuse it.
+		 *
+		 * But, we must change it according the new len.
+		 */
+		if (eb->len >= len) {
+			if (eb->len > len) {
+				btrfs_release_extent_buffer_page(eb, num_pages);
+
+				eb->len = len;
+				set_page_extent_head(eb->first_page, len);
+			}
+
+			atomic_inc(&eb->refs);
+			spin_unlock(&tree->buffer_lock);
+			mark_page_accessed(eb->first_page);
+			return eb;
+		} else {
+			int ret;
+
+			/*
+			 * if eb->len != len, it means this extent buffer
+			 * is reused as a new extent buffer.
+			 */
+			BUG_ON(atomic_read(&eb->refs) != 1);
+
+			i = num_extent_pages(eb->start, eb->len);
+			index += i;
+
+			ret = btrfs_enlarge_extent_buffer(tree, eb, len);
+			if (ret) {
+				spin_unlock(&tree->buffer_lock);
+				return NULL;
+			} else {
+				rb_erase(&eb->rb_node, &tree->buffer);
+				spin_unlock(&tree->buffer_lock);
+				mark_page_accessed(eb->first_page);
+				goto eb_alloc_pages;
+			}
+		}
  	}
  	spin_unlock(&tree->buffer_lock);
  
@@ -3170,6 +3248,8 @@ struct extent_buffer *alloc_extent_buffer(struct
extent_io_tree *tree,
  	} else {
  		i = 0;
  	}
+
+eb_alloc_pages:
  	for (; i < num_pages; i++, index++) {
  		p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
  		if (!p) {
@@ -3197,6 +3277,8 @@ struct extent_buffer *alloc_extent_buffer(struct
extent_io_tree *tree,
  		/* add one reference for the caller */
  		atomic_inc(&exists->refs);
  		spin_unlock(&tree->buffer_lock);
+
+		BUG_ON(exists->len != eb->len);
  		goto free_eb;
  	}
  	/* add one reference for the tree */
-- 
1.7.0.1
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2010-Jul-14 09:11 UTC

head link

Re: [PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize

Hi, Chris

Could you review these patches for me? I have tested them and everything
works ok.

[PATCH 1/2] btrfs: restructure try_release_extent_buffer()
[PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize
[PATCH] btrfs-progs: fix wrong extent buffer size when reading tree block

Thanks
Miao Xie

On Tue Jul 13 2010 16:55:55 GMT+0800 (CST), Miao Xie
wrote:> oops occured when we ran the following commands on the filesystem whose
> leafsize is greater than its nodesize.
> # mkfs.btrfs -l 8192 /dev/sda1
> ^^^^ 2 * PAGESIZE
> # mount /dev/sda1 /mnt
> # cat /dev/zero > /mnt/tmp_file0
> # umount /mnt
> Oops occured.
> (Sometimes we must do the loop of mount/umount several times to hit this
> bug)
>
> Oops infomation:
> ------------[ cut here ]------------
> WARNING: at fs/btrfs/extent_io.c:3685 copy_extent_buffer+0x48/0x192()
> Hardware name: FFFFFFFFFF
> Modules linked in: [snip]
> Pid: 3743, comm: umount Not tainted 2.6.35-rc4 #22
> Call Trace:
> [<ffffffff8103782e>] warn_slowpath_common+0x80/0x98
> [<ffffffff8103785b>] warn_slowpath_null+0x15/0x17
> [<ffffffff811713a3>] copy_extent_buffer+0x48/0x192
> [<ffffffff81140488>] __btrfs_cow_block+0x16f/0x559
> [<ffffffff81140ee7>] btrfs_cow_block+0x189/0x1a6
> [<ffffffff81157cf0>] commit_cowonly_roots+0x54/0x193
> [<ffffffff811587d5>] btrfs_commit_transaction+0x380/0x610
> [<ffffffff81050a67>] ? autoremove_wake_function+0x0/0x34
> [<ffffffff811554bf>] btrfs_commit_super+0xa2/0xc3
> [<ffffffff8115551a>] close_ctree+0x3a/0x330
> [<ffffffff810545e8>] ? up_write+0x1e/0x36
> [<ffffffff810f4818>] ? invalidate_inodes+0x120/0x132
> [<ffffffff8113b233>] btrfs_put_super+0x18/0x27
> [<ffffffff810e395c>] generic_shutdown_super+0x51/0xd2
> [<ffffffff810e3a28>] kill_anon_super+0x11/0x4f
> [<ffffffff810e2af8>] deactivate_locked_super+0x21/0x41
> [<ffffffff810e2cd0>] deactivate_super+0x40/0x44
> [<ffffffff810f7a4b>] mntput_no_expire+0xb8/0xe5
> [<ffffffff810f7fea>] sys_umount+0x2c8/0x2f3
> [<ffffffff8106062a>] ? trace_hardirqs_on_caller+0x10c/0x130
> [<ffffffff81001f2b>] system_call_fastpath+0x16/0x1b
> ---[ end trace 5f7f4dbc8bcadbc3 ]---
>
> The reason is that:
> In order to reuse the extent buffer, the btrfs doesn''t release the
extent
> buffer, When the btrfs releases its device space. So if the btrfs allocates
> the device space that has been released just now, the btrfs will get the
> old
> extent buffer mapping to the device space.
>
> But if the length of the new space is greator than the old extent buffer, a
> BUG() will be touched.
>
> The patch fixes this problem by change the length of the old extent buffer.
>
> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
> ---
> fs/btrfs/extent_io.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++--
> 1 files changed, 86 insertions(+), 4 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 70b7cc5..b4f1c42 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -3130,6 +3130,45 @@ static inline void
> btrfs_release_extent_buffer(struct extent_buffer *eb)
> __free_extent_buffer(eb);
> }
>
> +/*
> + * we may reuse a extent buffer whose device space has been released,
> if the len
> + * of the extent buffer is smaller than we expect, we must enlarge the
> extent
> + * buffer, and before doing that, we must release the extent buffer that
> + * intersects it.
> + *
> + * Don''t worry about the state of the extent buffer that is going
to be
> release.
> + * because it is just an image left in the memory, and its device space
> has been
> + * released, or the btrfs can''t allocate its device space for
other extent
> + * buffer.
> + *
> + * Note: Must hold io_tree->buffer_lock
> + */
> +static int btrfs_enlarge_extent_buffer(struct extent_io_tree *tree,
> + struct extent_buffer *eb,
> + unsigned long newlen)
> +{
> + struct rb_node *next;
> + struct extent_buffer *next_eb;
> +
> + eb->len = newlen;
> + set_page_extent_head(eb->first_page, newlen);
> +
> + next = rb_next(&eb->rb_node);
> + while (next) {
> + next_eb = rb_entry(next, struct extent_buffer, rb_node);
> + if (next_eb->start >= eb->start + eb->len)
> + break;
> +
> + if (atomic_read(&next_eb->refs) > 1)
> + return 1;
> +
> + next = rb_next(next);
> + rb_erase(&next_eb->rb_node, &tree->buffer);
> + btrfs_release_extent_buffer(next_eb);
> + }
> + return 0;
> +}
> +
> struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
> u64 start, unsigned long len,
> struct page *page0,
> @@ -3147,10 +3186,49 @@ struct extent_buffer *alloc_extent_buffer(struct
> extent_io_tree *tree,
> spin_lock(&tree->buffer_lock);
> eb = buffer_search(tree, start);
> if (eb) {
> - atomic_inc(&eb->refs);
> - spin_unlock(&tree->buffer_lock);
> - mark_page_accessed(eb->first_page);
> - return eb;
> + /*
> + * If this extent buffer''s device space has been released some
> + * time ago, and is reallocated again to store other metadata,
> + * but it hasn''t been release, we may get the old entent buffer
> + * and reuse it.
> + *
> + * But, we must change it according the new len.
> + */
> + if (eb->len >= len) {
> + if (eb->len > len) {
> + btrfs_release_extent_buffer_page(eb, num_pages);
> +
> + eb->len = len;
> + set_page_extent_head(eb->first_page, len);
> + }
> +
> + atomic_inc(&eb->refs);
> + spin_unlock(&tree->buffer_lock);
> + mark_page_accessed(eb->first_page);
> + return eb;
> + } else {
> + int ret;
> +
> + /*
> + * if eb->len != len, it means this extent buffer
> + * is reused as a new extent buffer.
> + */
> + BUG_ON(atomic_read(&eb->refs) != 1);
> +
> + i = num_extent_pages(eb->start, eb->len);
> + index += i;
> +
> + ret = btrfs_enlarge_extent_buffer(tree, eb, len);
> + if (ret) {
> + spin_unlock(&tree->buffer_lock);
> + return NULL;
> + } else {
> + rb_erase(&eb->rb_node, &tree->buffer);
> + spin_unlock(&tree->buffer_lock);
> + mark_page_accessed(eb->first_page);
> + goto eb_alloc_pages;
> + }
> + }
> }
> spin_unlock(&tree->buffer_lock);
>
> @@ -3170,6 +3248,8 @@ struct extent_buffer *alloc_extent_buffer(struct
> extent_io_tree *tree,
> } else {
> i = 0;
> }
> +
> +eb_alloc_pages:
> for (; i < num_pages; i++, index++) {
> p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
> if (!p) {
> @@ -3197,6 +3277,8 @@ struct extent_buffer *alloc_extent_buffer(struct
> extent_io_tree *tree,
> /* add one reference for the caller */
> atomic_inc(&exists->refs);
> spin_unlock(&tree->buffer_lock);
> +
> + BUG_ON(exists->len != eb->len);
> goto free_eb;
> }
> /* add one reference for the tree */
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Btrfs devel - Jul 2010 - [PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize

[PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize

Re: [PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize