Li Zefan
2012-Mar-12 08:39 UTC
[PATCH 1/2] Btrfs: make clear_extent_bit() always return 0 on success
Currently it returns a set of bits that were cleared, but this return value is not used at all. Moreover it doesn''t seem to be useful, because we may clear the bits of a few extent_states, but only the cleared bits of last one is returned. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> --- fs/btrfs/extent_io.c | 19 +++++++------------ 1 files changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a55fbe6..c968c95 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -394,18 +394,16 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, /* * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1), or - * forcibly remove the state from the tree (delete == 1). + * it will optionally wake up any one waiting on this state (wake == 1) * * If no bits are set on the state struct after clearing things, the * struct is freed and removed from the tree */ -static int clear_state_bit(struct extent_io_tree *tree, +static void clear_state_bit(struct extent_io_tree *tree, struct extent_state *state, int *bits, int wake) { int bits_to_clear = *bits & ~EXTENT_CTLBITS; - int ret = state->state & bits_to_clear; if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { u64 range = state->end - state->start + 1; @@ -427,7 +425,6 @@ static int clear_state_bit(struct extent_io_tree *tree, } else { merge_state(tree, state); } - return ret; } static struct extent_state * @@ -449,8 +446,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc) * * the range [start, end] is inclusive. * - * This takes the tree lock, and returns < 0 on error, > 0 if any of the - * bits were already set, or zero if none of the bits were already set. + * This takes the tree lock, and returns < 0 on error. */ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, int wake, int delete, @@ -464,7 +460,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct rb_node *node; u64 last_end; int err; - int set = 0; int clear = 0; if (delete) @@ -547,7 +542,7 @@ hit_next: if (err) goto out; if (state->end <= end) { - set |= clear_state_bit(tree, state, &bits, wake); + clear_state_bit(tree, state, &bits, wake); if (last_end == (u64)-1) goto out; start = last_end + 1; @@ -568,13 +563,13 @@ hit_next: if (wake) wake_up(&state->wq); - set |= clear_state_bit(tree, prealloc, &bits, wake); + clear_state_bit(tree, prealloc, &bits, wake); prealloc = NULL; goto out; } - set |= clear_state_bit(tree, state, &bits, wake); + clear_state_bit(tree, state, &bits, wake); next: if (last_end == (u64)-1) goto out; @@ -591,7 +586,7 @@ out: if (prealloc) free_extent_state(prealloc); - return set; + return 0; search_again: if (start > end) -- 1.7.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Li Zefan
2012-Mar-12 08:39 UTC
[PATCH 2/2] Btrfs: avoid possible use-after-free in clear_extent_bit()
clear_extent_bit() { next_node = rb_next(&state->rb_node); ... clear_state_bit(state); <-- this may free next_node if (next_node) { state = rb_entry(next_node); ... } } clear_state_bit() calls merge_state() which may free the next node of the passing extent_state, so clear_extent_bit() may end up referencing freed memory. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> --- 经过测试,发现clear_state_bit()会在80%的情况下直接把state free掉。所以之前 的比较简单的patch,会在80%的情况下重新goto回去搜索rbtree,会很慢。所以,现在 改成由clear_state_bit()返回下一个state。 --- fs/btrfs/extent_io.c | 36 +++++++++++++++++++++--------------- 1 files changed, 21 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c968c95..20f2f5a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -392,6 +392,15 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, return 0; } +static struct extent_state *next_state(struct extent_state *state) +{ + struct rb_node *next = rb_next(&state->rb_node); + if (next) + return rb_entry(next, struct extent_state, rb_node); + else + return NULL; +} + /* * utility function to clear some bits in an extent state struct. * it will optionally wake up any one waiting on this state (wake == 1) @@ -399,10 +408,11 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, * If no bits are set on the state struct after clearing things, the * struct is freed and removed from the tree */ -static void clear_state_bit(struct extent_io_tree *tree, - struct extent_state *state, - int *bits, int wake) +static struct extent_state *clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, + int *bits, int wake) { + struct extent_state *next; int bits_to_clear = *bits & ~EXTENT_CTLBITS; if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { @@ -415,6 +425,7 @@ static void clear_state_bit(struct extent_io_tree *tree, if (wake) wake_up(&state->wq); if (state->state == 0) { + next = next_state(state); if (state->tree) { rb_erase(&state->rb_node, &tree->state); state->tree = NULL; @@ -424,7 +435,9 @@ static void clear_state_bit(struct extent_io_tree *tree, } } else { merge_state(tree, state); + next = next_state(state); } + return next; } static struct extent_state * @@ -456,7 +469,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state *state; struct extent_state *cached; struct extent_state *prealloc = NULL; - struct rb_node *next_node; struct rb_node *node; u64 last_end; int err; @@ -508,14 +520,11 @@ hit_next: WARN_ON(state->end < start); last_end = state->end; - if (state->end < end && !need_resched()) - next_node = rb_next(&state->rb_node); - else - next_node = NULL; - /* the state doesn''t have the wanted bits, go ahead */ - if (!(state->state & bits)) + if (!(state->state & bits)) { + state = next_state(state); goto next; + } /* * | ---- desired range ---- | @@ -569,16 +578,13 @@ hit_next: goto out; } - clear_state_bit(tree, state, &bits, wake); + state = clear_state_bit(tree, state, &bits, wake); next: if (last_end == (u64)-1) goto out; start = last_end + 1; - if (start <= end && next_node) { - state = rb_entry(next_node, struct extent_state, - rb_node); + if (start <= end && state && !need_resched()) goto hit_next; - } goto search_again; out: -- 1.7.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Li Zefan
2012-Mar-12 09:07 UTC
[PATCH 2/2][RESEND] Btrfs: avoid possible use-after-free in clear_extent_bit()
clear_extent_bit() { next_node = rb_next(&state->rb_node); ... clear_state_bit(state); <-- this may free next_node if (next_node) { state = rb_entry(next_node); ... } } clear_state_bit() calls merge_state() which may free the next node of the passing extent_state, so clear_extent_bit() may end up referencing freed memory. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> --- no more Chinese characters in this section. ;) --- fs/btrfs/extent_io.c | 36 +++++++++++++++++++++--------------- 1 files changed, 21 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c968c95..20f2f5a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -392,6 +392,15 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, return 0; } +static struct extent_state *next_state(struct extent_state *state) +{ + struct rb_node *next = rb_next(&state->rb_node); + if (next) + return rb_entry(next, struct extent_state, rb_node); + else + return NULL; +} + /* * utility function to clear some bits in an extent state struct. * it will optionally wake up any one waiting on this state (wake == 1) @@ -399,10 +408,11 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, * If no bits are set on the state struct after clearing things, the * struct is freed and removed from the tree */ -static void clear_state_bit(struct extent_io_tree *tree, - struct extent_state *state, - int *bits, int wake) +static struct extent_state *clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, + int *bits, int wake) { + struct extent_state *next; int bits_to_clear = *bits & ~EXTENT_CTLBITS; if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { @@ -415,6 +425,7 @@ static void clear_state_bit(struct extent_io_tree *tree, if (wake) wake_up(&state->wq); if (state->state == 0) { + next = next_state(state); if (state->tree) { rb_erase(&state->rb_node, &tree->state); state->tree = NULL; @@ -424,7 +435,9 @@ static void clear_state_bit(struct extent_io_tree *tree, } } else { merge_state(tree, state); + next = next_state(state); } + return next; } static struct extent_state * @@ -456,7 +469,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state *state; struct extent_state *cached; struct extent_state *prealloc = NULL; - struct rb_node *next_node; struct rb_node *node; u64 last_end; int err; @@ -508,14 +520,11 @@ hit_next: WARN_ON(state->end < start); last_end = state->end; - if (state->end < end && !need_resched()) - next_node = rb_next(&state->rb_node); - else - next_node = NULL; - /* the state doesn''t have the wanted bits, go ahead */ - if (!(state->state & bits)) + if (!(state->state & bits)) { + state = next_state(state); goto next; + } /* * | ---- desired range ---- | @@ -569,16 +578,13 @@ hit_next: goto out; } - clear_state_bit(tree, state, &bits, wake); + state = clear_state_bit(tree, state, &bits, wake); next: if (last_end == (u64)-1) goto out; start = last_end + 1; - if (start <= end && next_node) { - state = rb_entry(next_node, struct extent_state, - rb_node); + if (start <= end && state && !need_resched()) goto hit_next; - } goto search_again; out: -- 1.7.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Sterba
2012-Apr-05 16:52 UTC
Re: [PATCH 1/2] Btrfs: make clear_extent_bit() always return 0 on success
On Mon, Mar 12, 2012 at 04:39:28PM +0800, Li Zefan wrote:> Currently it returns a set of bits that were cleared, but this return > value is not used at all. > > Moreover it doesn''t seem to be useful, because we may clear the bits > of a few extent_states, but only the cleared bits of last one is > returned. > > Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> > --- > fs/btrfs/extent_io.c | 19 +++++++------------ > 1 files changed, 7 insertions(+), 12 deletions(-) > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index a55fbe6..c968c95 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -394,18 +394,16 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, > > /* > * utility function to clear some bits in an extent state struct. > - * it will optionally wake up any one waiting on this state (wake == 1), or > - * forcibly remove the state from the tree (delete == 1). > + * it will optionally wake up any one waiting on this state (wake == 1) > * > * If no bits are set on the state struct after clearing things, the > * struct is freed and removed from the tree > */ > -static int clear_state_bit(struct extent_io_tree *tree, > +static void clear_state_bit(struct extent_io_tree *tree, > struct extent_state *state, > int *bits, int wake) > { > int bits_to_clear = *bits & ~EXTENT_CTLBITS; > - int ret = state->state & bits_to_clear; > > if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { > u64 range = state->end - state->start + 1; > @@ -427,7 +425,6 @@ static int clear_state_bit(struct extent_io_tree *tree, > } else { > merge_state(tree, state); > } > - return ret; > } > > static struct extent_state *The above part of the patch still applies and with only subject change to something like Btrfs: retrurn void from clear_state_bit is a rc2 material. So, Li, if you''re ok with this change I''m adding it (with the 2/2 patch) to my local queue of rc patches for Chris. david (the rest of the patch was done within the error handling series)> @@ -449,8 +446,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc) > * > * the range [start, end] is inclusive. > * > - * This takes the tree lock, and returns < 0 on error, > 0 if any of the > - * bits were already set, or zero if none of the bits were already set. > + * This takes the tree lock, and returns < 0 on error. > */ > int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, > int bits, int wake, int delete, > @@ -464,7 +460,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, > struct rb_node *node; > u64 last_end; > int err; > - int set = 0; > int clear = 0; > > if (delete) > @@ -547,7 +542,7 @@ hit_next: > if (err) > goto out; > if (state->end <= end) { > - set |= clear_state_bit(tree, state, &bits, wake); > + clear_state_bit(tree, state, &bits, wake); > if (last_end == (u64)-1) > goto out; > start = last_end + 1; > @@ -568,13 +563,13 @@ hit_next: > if (wake) > wake_up(&state->wq); > > - set |= clear_state_bit(tree, prealloc, &bits, wake); > + clear_state_bit(tree, prealloc, &bits, wake); > > prealloc = NULL; > goto out; > } > > - set |= clear_state_bit(tree, state, &bits, wake); > + clear_state_bit(tree, state, &bits, wake); > next: > if (last_end == (u64)-1) > goto out; > @@ -591,7 +586,7 @@ out: > if (prealloc) > free_extent_state(prealloc); > > - return set; > + return 0; > > search_again: > if (start > end) > -- 1.7.3.1 > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Li Zefan
2012-Apr-06 00:24 UTC
Re: [PATCH 1/2] Btrfs: make clear_extent_bit() always return 0 on success
(Note: I''ve changed my email address ;) David Sterba wrote:> On Mon, Mar 12, 2012 at 04:39:28PM +0800, Li Zefan wrote: >> Currently it returns a set of bits that were cleared, but this return >> value is not used at all. >> >> Moreover it doesn''t seem to be useful, because we may clear the bits >> of a few extent_states, but only the cleared bits of last one is >> returned. >> >> Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> >> --- >> fs/btrfs/extent_io.c | 19 +++++++------------ >> 1 files changed, 7 insertions(+), 12 deletions(-) >> >> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c >> index a55fbe6..c968c95 100644 >> --- a/fs/btrfs/extent_io.c >> +++ b/fs/btrfs/extent_io.c >> @@ -394,18 +394,16 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, >> >> /* >> * utility function to clear some bits in an extent state struct. >> - * it will optionally wake up any one waiting on this state (wake == 1), or >> - * forcibly remove the state from the tree (delete == 1). >> + * it will optionally wake up any one waiting on this state (wake == 1) >> * >> * If no bits are set on the state struct after clearing things, the >> * struct is freed and removed from the tree >> */ >> -static int clear_state_bit(struct extent_io_tree *tree, >> +static void clear_state_bit(struct extent_io_tree *tree, >> struct extent_state *state, >> int *bits, int wake) >> { >> int bits_to_clear = *bits & ~EXTENT_CTLBITS; >> - int ret = state->state & bits_to_clear; >> >> if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { >> u64 range = state->end - state->start + 1; >> @@ -427,7 +425,6 @@ static int clear_state_bit(struct extent_io_tree *tree, >> } else { >> merge_state(tree, state); >> } >> - return ret; >> } >> >> static struct extent_state * > > The above part of the patch still applies and with only subject change > to something like > > Btrfs: retrurn void from clear_state_bit > > is a rc2 material. So, Li, if you''re ok with this change I''m adding it > (with the 2/2 patch) to my local queue of rc patches for Chris. >Thanks for doing this! -- Li Zefan> > david > > (the rest of the patch was done within the error handling series) > >> @@ -449,8 +446,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc) >> * >> * the range [start, end] is inclusive. >> * >> - * This takes the tree lock, and returns < 0 on error, > 0 if any of the >> - * bits were already set, or zero if none of the bits were already set. >> + * This takes the tree lock, and returns < 0 on error. >> */ >> int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, >> int bits, int wake, int delete, >> @@ -464,7 +460,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, >> struct rb_node *node; >> u64 last_end; >> int err; >> - int set = 0; >> int clear = 0; >> >> if (delete) >> @@ -547,7 +542,7 @@ hit_next: >> if (err) >> goto out; >> if (state->end <= end) { >> - set |= clear_state_bit(tree, state, &bits, wake); >> + clear_state_bit(tree, state, &bits, wake); >> if (last_end == (u64)-1) >> goto out; >> start = last_end + 1; >> @@ -568,13 +563,13 @@ hit_next: >> if (wake) >> wake_up(&state->wq); >> >> - set |= clear_state_bit(tree, prealloc, &bits, wake); >> + clear_state_bit(tree, prealloc, &bits, wake); >> >> prealloc = NULL; >> goto out; >> } >> >> - set |= clear_state_bit(tree, state, &bits, wake); >> + clear_state_bit(tree, state, &bits, wake); >> next: >> if (last_end == (u64)-1) >> goto out; >> @@ -591,7 +586,7 @@ out: >> if (prealloc) >> free_extent_state(prealloc); >> >> - return set; >> + return 0; >> >> search_again: >> if (start > end) >> -- 1.7.3.1-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html