thr3ads.net - Btrfs devel - [PATCH v4 1/2] Btrfs: fix wrong super generation mismatch when scrubbing supers [Dec 2013]

If this information is useful, please help other people find it:
Share via:

Wang Shilong

2013-Dec-04 13:15 UTC

[PATCH v4 1/2] Btrfs: fix wrong super generation mismatch when scrubbing supers

We came a race condition when scrubbing superblocks, the story is:

In commiting transaction, we will update @last_trans_commited after
writting superblocks, if scrubber start after writting superblocks
and before updating @last_trans_commited, generation mismatch happens!

We fix this by checking @scrub_pause_req, and we won''t start a srubber
until commiting transaction is finished.(after btrfs_scrub_continue()
finished.)

Reported-by: Sebastian Ochmann <ochmann@informatik.uni-bonn.de>
Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
Reviewed-by: Miao Xie <miaox@cn.fujitsu.com>
---
v3->v4:
	by checking @scrub_pause_req, block a scrubber
	if we are committing transaction(thanks to Miao and Liu)
---
 fs/btrfs/scrub.c | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2544805..d27f95e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -257,6 +257,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset,
u64 root,
 static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
 			    int mirror_num, u64 physical_for_dev_replace);
 static void copy_nocow_pages_worker(struct btrfs_work *work);
+static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
 
 
 static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
@@ -270,6 +271,16 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
 	wake_up(&sctx->list_wait);
 }
 
+static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
+{
+	while (atomic_read(&fs_info->scrub_pause_req)) {
+		mutex_unlock(&fs_info->scrub_lock);
+		wait_event(fs_info->scrub_pause_wait,
+		   atomic_read(&fs_info->scrub_pause_req) == 0);
+		mutex_lock(&fs_info->scrub_lock);
+	}
+}
+
 /*
  * used for workers that require transaction commits (i.e., for the
  * NOCOW case)
@@ -2330,14 +2341,10 @@ static noinline_for_stack int scrub_stripe(struct
scrub_ctx *sctx,
 		btrfs_reada_wait(reada2);
 
 	mutex_lock(&fs_info->scrub_lock);
-	while (atomic_read(&fs_info->scrub_pause_req)) {
-		mutex_unlock(&fs_info->scrub_lock);
-		wait_event(fs_info->scrub_pause_wait,
-		   atomic_read(&fs_info->scrub_pause_req) == 0);
-		mutex_lock(&fs_info->scrub_lock);
-	}
+	scrub_blocked_if_needed(fs_info);
 	atomic_dec(&fs_info->scrubs_paused);
 	mutex_unlock(&fs_info->scrub_lock);
+
 	wake_up(&fs_info->scrub_pause_wait);
 
 	/*
@@ -2377,15 +2384,12 @@ static noinline_for_stack int scrub_stripe(struct
scrub_ctx *sctx,
 			atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
 			atomic_inc(&fs_info->scrubs_paused);
 			wake_up(&fs_info->scrub_pause_wait);
+
 			mutex_lock(&fs_info->scrub_lock);
-			while (atomic_read(&fs_info->scrub_pause_req)) {
-				mutex_unlock(&fs_info->scrub_lock);
-				wait_event(fs_info->scrub_pause_wait,
-				   atomic_read(&fs_info->scrub_pause_req) == 0);
-				mutex_lock(&fs_info->scrub_lock);
-			}
+			scrub_blocked_if_needed(fs_info);
 			atomic_dec(&fs_info->scrubs_paused);
 			mutex_unlock(&fs_info->scrub_lock);
+
 			wake_up(&fs_info->scrub_pause_wait);
 		}
 
@@ -2707,14 +2711,10 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 			   atomic_read(&sctx->workers_pending) == 0);
 
 		mutex_lock(&fs_info->scrub_lock);
-		while (atomic_read(&fs_info->scrub_pause_req)) {
-			mutex_unlock(&fs_info->scrub_lock);
-			wait_event(fs_info->scrub_pause_wait,
-			   atomic_read(&fs_info->scrub_pause_req) == 0);
-			mutex_lock(&fs_info->scrub_lock);
-		}
+		scrub_blocked_if_needed(fs_info);
 		atomic_dec(&fs_info->scrubs_paused);
 		mutex_unlock(&fs_info->scrub_lock);
+
 		wake_up(&fs_info->scrub_pause_wait);
 
 		btrfs_put_block_group(cache);
@@ -2926,7 +2926,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64
devid, u64 start,
 	}
 	sctx->readonly = readonly;
 	dev->scrub_device = sctx;
+	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 
+	/*
+	 * checking @scrub_pause_req here, we can avoid
+	 * race between committing transaction and scrubbing.
+	 */
+	scrub_blocked_if_needed(fs_info);
 	atomic_inc(&fs_info->scrubs_running);
 	mutex_unlock(&fs_info->scrub_lock);
 
@@ -2935,9 +2941,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64
devid, u64 start,
 		 * by holding device list mutex, we can
 		 * kick off writing super in log tree sync.
 		 */
+		mutex_lock(&fs_info->fs_devices->device_list_mutex);
 		ret = scrub_supers(sctx, dev);
+		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 	}
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 
 	if (!ret)
 		ret = scrub_enumerate_chunks(sctx, dev, start, end,
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Sebastian Ochmann

2013-Dec-04 22:05 UTC

head link

Re: [PATCH v4 1/2] Btrfs: fix wrong super generation mismatch when scrubbing supers

Hello,

seems to be working for me (only tested using both parts of the patch); 
wasn''t able to trigger the errors after almost an hour of
stress-testing.

Best regards,
Sebastian

On 04.12.2013 14:15, Wang Shilong wrote:> We came a race condition when scrubbing superblocks, the story is:
>
> In commiting transaction, we will update @last_trans_commited after
> writting superblocks, if scrubber start after writting superblocks
> and before updating @last_trans_commited, generation mismatch happens!
>
> We fix this by checking @scrub_pause_req, and we won''t start a
srubber
> until commiting transaction is finished.(after btrfs_scrub_continue()
> finished.)
>
> Reported-by: Sebastian Ochmann <ochmann@informatik.uni-bonn.de>
> Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
> Reviewed-by: Miao Xie <miaox@cn.fujitsu.com>
> ---
> v3->v4:
> 	by checking @scrub_pause_req, block a scrubber
> 	if we are committing transaction(thanks to Miao and Liu)
> ---
>   fs/btrfs/scrub.c | 45 ++++++++++++++++++++++++++-------------------
>   1 file changed, 26 insertions(+), 19 deletions(-)
>
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index 2544805..d27f95e 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -257,6 +257,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64
offset, u64 root,
>   static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
>   			    int mirror_num, u64 physical_for_dev_replace);
>   static void copy_nocow_pages_worker(struct btrfs_work *work);
> +static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
>
>
>   static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
> @@ -270,6 +271,16 @@ static void scrub_pending_bio_dec(struct scrub_ctx
*sctx)
>   	wake_up(&sctx->list_wait);
>   }
>
> +static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
> +{
> +	while (atomic_read(&fs_info->scrub_pause_req)) {
> +		mutex_unlock(&fs_info->scrub_lock);
> +		wait_event(fs_info->scrub_pause_wait,
> +		   atomic_read(&fs_info->scrub_pause_req) == 0);
> +		mutex_lock(&fs_info->scrub_lock);
> +	}
> +}
> +
>   /*
>    * used for workers that require transaction commits (i.e., for the
>    * NOCOW case)
> @@ -2330,14 +2341,10 @@ static noinline_for_stack int scrub_stripe(struct
scrub_ctx *sctx,
>   		btrfs_reada_wait(reada2);
>
>   	mutex_lock(&fs_info->scrub_lock);
> -	while (atomic_read(&fs_info->scrub_pause_req)) {
> -		mutex_unlock(&fs_info->scrub_lock);
> -		wait_event(fs_info->scrub_pause_wait,
> -		   atomic_read(&fs_info->scrub_pause_req) == 0);
> -		mutex_lock(&fs_info->scrub_lock);
> -	}
> +	scrub_blocked_if_needed(fs_info);
>   	atomic_dec(&fs_info->scrubs_paused);
>   	mutex_unlock(&fs_info->scrub_lock);
> +
>   	wake_up(&fs_info->scrub_pause_wait);
>
>   	/*
> @@ -2377,15 +2384,12 @@ static noinline_for_stack int scrub_stripe(struct
scrub_ctx *sctx,
>   			atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
>   			atomic_inc(&fs_info->scrubs_paused);
>   			wake_up(&fs_info->scrub_pause_wait);
> +
>   			mutex_lock(&fs_info->scrub_lock);
> -			while (atomic_read(&fs_info->scrub_pause_req)) {
> -				mutex_unlock(&fs_info->scrub_lock);
> -				wait_event(fs_info->scrub_pause_wait,
> -				   atomic_read(&fs_info->scrub_pause_req) == 0);
> -				mutex_lock(&fs_info->scrub_lock);
> -			}
> +			scrub_blocked_if_needed(fs_info);
>   			atomic_dec(&fs_info->scrubs_paused);
>   			mutex_unlock(&fs_info->scrub_lock);
> +
>   			wake_up(&fs_info->scrub_pause_wait);
>   		}
>
> @@ -2707,14 +2711,10 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
>   			   atomic_read(&sctx->workers_pending) == 0);
>
>   		mutex_lock(&fs_info->scrub_lock);
> -		while (atomic_read(&fs_info->scrub_pause_req)) {
> -			mutex_unlock(&fs_info->scrub_lock);
> -			wait_event(fs_info->scrub_pause_wait,
> -			   atomic_read(&fs_info->scrub_pause_req) == 0);
> -			mutex_lock(&fs_info->scrub_lock);
> -		}
> +		scrub_blocked_if_needed(fs_info);
>   		atomic_dec(&fs_info->scrubs_paused);
>   		mutex_unlock(&fs_info->scrub_lock);
> +
>   		wake_up(&fs_info->scrub_pause_wait);
>
>   		btrfs_put_block_group(cache);
> @@ -2926,7 +2926,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info,
u64 devid, u64 start,
>   	}
>   	sctx->readonly = readonly;
>   	dev->scrub_device = sctx;
> +	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>
> +	/*
> +	 * checking @scrub_pause_req here, we can avoid
> +	 * race between committing transaction and scrubbing.
> +	 */
> +	scrub_blocked_if_needed(fs_info);
>   	atomic_inc(&fs_info->scrubs_running);
>   	mutex_unlock(&fs_info->scrub_lock);
>
> @@ -2935,9 +2941,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info,
u64 devid, u64 start,
>   		 * by holding device list mutex, we can
>   		 * kick off writing super in log tree sync.
>   		 */
> +		mutex_lock(&fs_info->fs_devices->device_list_mutex);
>   		ret = scrub_supers(sctx, dev);
> +		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>   	}
> -	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>
>   	if (!ret)
>   		ret = scrub_enumerate_chunks(sctx, dev, start, end,
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Btrfs devel - Dec 2013 - [PATCH v4 1/2] Btrfs: fix wrong super generation mismatch when scrubbing supers

[PATCH v4 1/2] Btrfs: fix wrong super generation mismatch when scrubbing supers

Re: [PATCH v4 1/2] Btrfs: fix wrong super generation mismatch when scrubbing supers