Wang Shilong
2013-Dec-04 13:15 UTC
[PATCH v4 1/2] Btrfs: fix wrong super generation mismatch when scrubbing supers
We came a race condition when scrubbing superblocks, the story is: In commiting transaction, we will update @last_trans_commited after writting superblocks, if scrubber start after writting superblocks and before updating @last_trans_commited, generation mismatch happens! We fix this by checking @scrub_pause_req, and we won''t start a srubber until commiting transaction is finished.(after btrfs_scrub_continue() finished.) Reported-by: Sebastian Ochmann <ochmann@informatik.uni-bonn.de> Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com> Reviewed-by: Miao Xie <miaox@cn.fujitsu.com> --- v3->v4: by checking @scrub_pause_req, block a scrubber if we are committing transaction(thanks to Miao and Liu) --- fs/btrfs/scrub.c | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2544805..d27f95e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -257,6 +257,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, int mirror_num, u64 physical_for_dev_replace); static void copy_nocow_pages_worker(struct btrfs_work *work); +static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_pending_bio_inc(struct scrub_ctx *sctx) @@ -270,6 +271,16 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx) wake_up(&sctx->list_wait); } +static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) +{ + while (atomic_read(&fs_info->scrub_pause_req)) { + mutex_unlock(&fs_info->scrub_lock); + wait_event(fs_info->scrub_pause_wait, + atomic_read(&fs_info->scrub_pause_req) == 0); + mutex_lock(&fs_info->scrub_lock); + } +} + /* * used for workers that require transaction commits (i.e., for the * NOCOW case) @@ -2330,14 +2341,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, btrfs_reada_wait(reada2); mutex_lock(&fs_info->scrub_lock); - while (atomic_read(&fs_info->scrub_pause_req)) { - mutex_unlock(&fs_info->scrub_lock); - wait_event(fs_info->scrub_pause_wait, - atomic_read(&fs_info->scrub_pause_req) == 0); - mutex_lock(&fs_info->scrub_lock); - } + scrub_blocked_if_needed(fs_info); atomic_dec(&fs_info->scrubs_paused); mutex_unlock(&fs_info->scrub_lock); + wake_up(&fs_info->scrub_pause_wait); /* @@ -2377,15 +2384,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, atomic_set(&sctx->wr_ctx.flush_all_writes, 0); atomic_inc(&fs_info->scrubs_paused); wake_up(&fs_info->scrub_pause_wait); + mutex_lock(&fs_info->scrub_lock); - while (atomic_read(&fs_info->scrub_pause_req)) { - mutex_unlock(&fs_info->scrub_lock); - wait_event(fs_info->scrub_pause_wait, - atomic_read(&fs_info->scrub_pause_req) == 0); - mutex_lock(&fs_info->scrub_lock); - } + scrub_blocked_if_needed(fs_info); atomic_dec(&fs_info->scrubs_paused); mutex_unlock(&fs_info->scrub_lock); + wake_up(&fs_info->scrub_pause_wait); } @@ -2707,14 +2711,10 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, atomic_read(&sctx->workers_pending) == 0); mutex_lock(&fs_info->scrub_lock); - while (atomic_read(&fs_info->scrub_pause_req)) { - mutex_unlock(&fs_info->scrub_lock); - wait_event(fs_info->scrub_pause_wait, - atomic_read(&fs_info->scrub_pause_req) == 0); - mutex_lock(&fs_info->scrub_lock); - } + scrub_blocked_if_needed(fs_info); atomic_dec(&fs_info->scrubs_paused); mutex_unlock(&fs_info->scrub_lock); + wake_up(&fs_info->scrub_pause_wait); btrfs_put_block_group(cache); @@ -2926,7 +2926,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, } sctx->readonly = readonly; dev->scrub_device = sctx; + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + /* + * checking @scrub_pause_req here, we can avoid + * race between committing transaction and scrubbing. + */ + scrub_blocked_if_needed(fs_info); atomic_inc(&fs_info->scrubs_running); mutex_unlock(&fs_info->scrub_lock); @@ -2935,9 +2941,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, * by holding device list mutex, we can * kick off writing super in log tree sync. */ + mutex_lock(&fs_info->fs_devices->device_list_mutex); ret = scrub_supers(sctx, dev); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); } - mutex_unlock(&fs_info->fs_devices->device_list_mutex); if (!ret) ret = scrub_enumerate_chunks(sctx, dev, start, end, -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sebastian Ochmann
2013-Dec-04 22:05 UTC
Re: [PATCH v4 1/2] Btrfs: fix wrong super generation mismatch when scrubbing supers
Hello, seems to be working for me (only tested using both parts of the patch); wasn''t able to trigger the errors after almost an hour of stress-testing. Best regards, Sebastian On 04.12.2013 14:15, Wang Shilong wrote:> We came a race condition when scrubbing superblocks, the story is: > > In commiting transaction, we will update @last_trans_commited after > writting superblocks, if scrubber start after writting superblocks > and before updating @last_trans_commited, generation mismatch happens! > > We fix this by checking @scrub_pause_req, and we won''t start a srubber > until commiting transaction is finished.(after btrfs_scrub_continue() > finished.) > > Reported-by: Sebastian Ochmann <ochmann@informatik.uni-bonn.de> > Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com> > Reviewed-by: Miao Xie <miaox@cn.fujitsu.com> > --- > v3->v4: > by checking @scrub_pause_req, block a scrubber > if we are committing transaction(thanks to Miao and Liu) > --- > fs/btrfs/scrub.c | 45 ++++++++++++++++++++++++++------------------- > 1 file changed, 26 insertions(+), 19 deletions(-) > > diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c > index 2544805..d27f95e 100644 > --- a/fs/btrfs/scrub.c > +++ b/fs/btrfs/scrub.c > @@ -257,6 +257,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, > static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, > int mirror_num, u64 physical_for_dev_replace); > static void copy_nocow_pages_worker(struct btrfs_work *work); > +static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); > > > static void scrub_pending_bio_inc(struct scrub_ctx *sctx) > @@ -270,6 +271,16 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx) > wake_up(&sctx->list_wait); > } > > +static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) > +{ > + while (atomic_read(&fs_info->scrub_pause_req)) { > + mutex_unlock(&fs_info->scrub_lock); > + wait_event(fs_info->scrub_pause_wait, > + atomic_read(&fs_info->scrub_pause_req) == 0); > + mutex_lock(&fs_info->scrub_lock); > + } > +} > + > /* > * used for workers that require transaction commits (i.e., for the > * NOCOW case) > @@ -2330,14 +2341,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, > btrfs_reada_wait(reada2); > > mutex_lock(&fs_info->scrub_lock); > - while (atomic_read(&fs_info->scrub_pause_req)) { > - mutex_unlock(&fs_info->scrub_lock); > - wait_event(fs_info->scrub_pause_wait, > - atomic_read(&fs_info->scrub_pause_req) == 0); > - mutex_lock(&fs_info->scrub_lock); > - } > + scrub_blocked_if_needed(fs_info); > atomic_dec(&fs_info->scrubs_paused); > mutex_unlock(&fs_info->scrub_lock); > + > wake_up(&fs_info->scrub_pause_wait); > > /* > @@ -2377,15 +2384,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, > atomic_set(&sctx->wr_ctx.flush_all_writes, 0); > atomic_inc(&fs_info->scrubs_paused); > wake_up(&fs_info->scrub_pause_wait); > + > mutex_lock(&fs_info->scrub_lock); > - while (atomic_read(&fs_info->scrub_pause_req)) { > - mutex_unlock(&fs_info->scrub_lock); > - wait_event(fs_info->scrub_pause_wait, > - atomic_read(&fs_info->scrub_pause_req) == 0); > - mutex_lock(&fs_info->scrub_lock); > - } > + scrub_blocked_if_needed(fs_info); > atomic_dec(&fs_info->scrubs_paused); > mutex_unlock(&fs_info->scrub_lock); > + > wake_up(&fs_info->scrub_pause_wait); > } > > @@ -2707,14 +2711,10 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, > atomic_read(&sctx->workers_pending) == 0); > > mutex_lock(&fs_info->scrub_lock); > - while (atomic_read(&fs_info->scrub_pause_req)) { > - mutex_unlock(&fs_info->scrub_lock); > - wait_event(fs_info->scrub_pause_wait, > - atomic_read(&fs_info->scrub_pause_req) == 0); > - mutex_lock(&fs_info->scrub_lock); > - } > + scrub_blocked_if_needed(fs_info); > atomic_dec(&fs_info->scrubs_paused); > mutex_unlock(&fs_info->scrub_lock); > + > wake_up(&fs_info->scrub_pause_wait); > > btrfs_put_block_group(cache); > @@ -2926,7 +2926,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, > } > sctx->readonly = readonly; > dev->scrub_device = sctx; > + mutex_unlock(&fs_info->fs_devices->device_list_mutex); > > + /* > + * checking @scrub_pause_req here, we can avoid > + * race between committing transaction and scrubbing. > + */ > + scrub_blocked_if_needed(fs_info); > atomic_inc(&fs_info->scrubs_running); > mutex_unlock(&fs_info->scrub_lock); > > @@ -2935,9 +2941,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, > * by holding device list mutex, we can > * kick off writing super in log tree sync. > */ > + mutex_lock(&fs_info->fs_devices->device_list_mutex); > ret = scrub_supers(sctx, dev); > + mutex_unlock(&fs_info->fs_devices->device_list_mutex); > } > - mutex_unlock(&fs_info->fs_devices->device_list_mutex); > > if (!ret) > ret = scrub_enumerate_chunks(sctx, dev, start, end, >-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html